use logos::{Lexer, Logos, SpannedIter};
use crate::{ParseError, ParseErrorKind, SourceFile};
lalrpop_util::lalrpop_mod!(grammar, "/grammar.rs");
type Result<T, E = ParseError> = std::result::Result<T, E>;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Logos)]
#[logos(skip r"([ \t\v\f]|#[^\r\n]*)+")]
enum Token<'i> {
#[regex("\r?\n")]
Newline,
#[token("if")]
If,
#[token("else")]
Else,
#[token("for")]
For,
#[token("in")]
In,
#[token("while")]
While,
#[token("break")]
Break,
#[token("continue")]
Continue,
#[token("function")]
Function,
#[token("return")]
Return,
#[token("begin")]
Begin,
#[token("end")]
End,
#[token("switch")]
Switch,
#[token("case")]
Case,
#[token("and")]
And,
#[token("or")]
Or,
#[token("not")]
Not,
#[token("(")]
LParen,
#[token(")")]
RParen,
#[token("{")]
LBrace,
#[token("}")]
RBrace,
#[token("|")]
Pipe,
#[token("&|")]
AmpPipe,
#[token(";")]
Semi,
#[token("*")]
Star,
#[token("**")]
StarStar,
#[token("~")]
Tilde,
#[token("~/")]
TildeSlash,
#[token("||")]
PipePipe,
#[token("&&")]
AmpAmp,
#[token("&>")]
AmpGt,
#[regex(r"\d*<")]
Lt(&'i str),
#[regex(r"\d*<\?")]
LtQus(&'i str),
#[regex(r"\d*<&")]
LtAmp(&'i str),
#[regex(r"\d*>")]
Gt(&'i str),
#[regex(r"\d*>\?")]
GtQus(&'i str),
#[regex(r"\d*>&")]
GtAmp(&'i str),
#[regex(r"\d*>>")]
GtGt(&'i str),
#[regex(r"\d*>\|")]
GtPipe(&'i str),
#[regex(r"[\w%+,\-./=@^:!?\[\]]+")]
#[regex(r#"\\\r?\n"#, |_| "")]
#[regex(r#"\\."#, unescape_word_single)]
Word(&'i str),
#[regex(r"\\[0-7]{3}", |lex| unescape_unicode::<8>(&lex.slice()[1..]))]
#[regex(
r"\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}",
|lex| unescape_unicode::<16>(&lex.slice()[2..])
)]
Escape(char),
#[token("\"")]
DQuote,
#[regex(r#"\$+\w+"#)]
Variable(&'i str),
#[token("$(")]
DollarLParen,
Verbatim(&'i str),
#[token("'")]
SQuote,
Join,
Comma,
}
impl Token<'_> {
fn can_join_left(self) -> bool {
matches!(
self,
Token::Word(_)
| Token::Escape(_)
| Token::Star
| Token::TildeSlash
| Token::Tilde
| Token::DollarLParen
| Token::LBrace
| Token::DQuote
| Token::SQuote
| Token::Variable(_)
)
}
fn can_join_right(self) -> bool {
matches!(
self,
Token::Word(_)
| Token::Escape(_)
| Token::Star
| Token::TildeSlash
| Token::Tilde
| Token::RParen
| Token::RBrace
| Token::DQuote
| Token::SQuote
| Token::Variable(_)
)
}
}
fn unescape_unicode<const RADIX: u32>(s: &str) -> Option<char> {
char::from_u32(u32::from_str_radix(s, RADIX).ok()?)
}
fn unescape_word_single<'i>(lex: &mut Lexer<'i, Token<'i>>) -> &'i str {
match &lex.slice()[1..] {
"\\" => "\\",
"\"" => "\"",
"'" => "'",
"a" => "\x07",
"e" => "\x1b",
"n" => "\n",
"r" => "\r",
s => s,
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Logos)]
enum TokenBrace<'i> {
#[token("{")]
LBrace,
#[token("}")]
RBrace,
#[token("\"")]
DQuote,
#[token("'")]
SQuote,
#[token("$(")]
DollarLParen,
#[regex(r"\$+\w+")]
Variable(&'i str),
#[token(",")]
Comma,
#[regex(r#"[^{,}"'$\\]+"#)]
Verbatim(&'i str),
#[regex(r"\\[0-7]{3}", |lex| unescape_unicode::<8>(&lex.slice()[1..]))]
#[regex(
r"\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}",
|lex| unescape_unicode::<16>(&lex.slice()[2..])
)]
Escape(char),
}
impl<'i> From<TokenBrace<'i>> for Token<'i> {
fn from(tok: TokenBrace<'i>) -> Self {
match tok {
TokenBrace::LBrace => Token::LBrace,
TokenBrace::Comma => Token::Comma,
TokenBrace::RBrace => Token::RBrace,
TokenBrace::DQuote => Token::DQuote,
TokenBrace::SQuote => Token::SQuote,
TokenBrace::Variable(v) => Token::Variable(v),
TokenBrace::DollarLParen => Token::DollarLParen,
TokenBrace::Verbatim(s) => Token::Verbatim(s),
TokenBrace::Escape(c) => Token::Escape(c),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Logos)]
enum TokenDString<'i> {
#[token("\"")]
DQuote,
#[regex(r#"\$+\w+"#)]
Variable(&'i str),
#[token("$(")]
DollarLParen,
#[regex(r#"[^"\\$]+"#)]
#[regex(r"\\.", unescape_dquote)]
Verbatim(&'i str),
}
impl<'i> From<TokenDString<'i>> for Token<'i> {
fn from(tok: TokenDString<'i>) -> Self {
match tok {
TokenDString::DQuote => Token::DQuote,
TokenDString::DollarLParen => Token::DollarLParen,
TokenDString::Variable(v) => Token::Variable(v),
TokenDString::Verbatim(v) => Token::Verbatim(v),
}
}
}
fn unescape_dquote<'i>(lex: &mut Lexer<'i, TokenDString<'i>>) -> &'i str {
match lex.slice() {
"\\\\" => "\\",
"\\\"" => "\"",
"\\$" => "$",
s => s,
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Logos)]
enum TokenSString<'i> {
#[regex(r"[^'\\]+")]
#[regex(r"\\.", unescape_squote)]
Verbatim(&'i str),
#[token("'")]
SQuote,
}
fn unescape_squote<'i>(lex: &mut Lexer<'i, TokenSString<'i>>) -> &'i str {
match lex.slice() {
r"\'" => r"'",
r"\\" => r"\",
s => s,
}
}
impl<'i> From<TokenSString<'i>> for Token<'i> {
fn from(tok: TokenSString<'i>) -> Self {
match tok {
TokenSString::Verbatim(v) => Token::Verbatim(v),
TokenSString::SQuote => Token::SQuote,
}
}
}
struct MyLexer<'i, 'o, Tok: Logos<'i>> {
iter: SpannedIter<'i, Tok>,
out: &'o mut Vec<(usize, Token<'i>, usize)>,
}
impl<'i, Tok: Logos<'i, Extras = ()> + Into<Token<'i>> + Copy> MyLexer<'i, '_, Tok> {
fn next(&mut self) -> Result<Option<(usize, Tok, usize)>> {
match self.iter.next() {
None => Ok(None),
Some((Ok(tok), span)) => Ok(Some((span.start, tok, span.end))),
Some((Err(_), span)) => Err(ParseError::new(span.start, span.end, ParseErrorKind::Lex)),
}
}
fn next_pushed(&mut self) -> Result<Option<Tok>> {
Ok(match self.next()? {
None => None,
Some((lpos, tok, rpos)) => {
self.out.push((lpos, tok.into(), rpos));
Some(tok)
}
})
}
fn with_morphed<Tok2, T, F>(&mut self, f: F) -> T
where
Tok2: Logos<'i, Source = Tok::Source, Extras = ()> + Copy,
F: FnOnce(&mut MyLexer<'i, '_, Tok2>) -> T,
{
let mut sublex = MyLexer {
iter: (*self.iter).clone().morph().spanned(),
out: self.out,
};
let v = f(&mut sublex);
self.iter = (*sublex.iter).clone().morph().spanned();
v
}
}
fn lex_brace<'i>(lexer: &mut MyLexer<'i, '_, TokenBrace<'i>>) -> Result<()> {
while let Some(tok) = lexer.next_pushed()? {
match tok {
TokenBrace::RBrace => break,
TokenBrace::LBrace => lexer.with_morphed(lex_brace)?,
TokenBrace::DollarLParen => lexer.with_morphed(lex_normal)?,
TokenBrace::SQuote => lexer.with_morphed(lex_sstring)?,
TokenBrace::DQuote => lexer.with_morphed(lex_dstring)?,
TokenBrace::Variable(_)
| TokenBrace::Escape(_)
| TokenBrace::Verbatim(_)
| TokenBrace::Comma => {}
}
}
Ok(())
}
fn lex_sstring<'i>(lexer: &mut MyLexer<'i, '_, TokenSString<'i>>) -> Result<()> {
while let Some(tok) = lexer.next_pushed()? {
match tok {
TokenSString::SQuote => break,
TokenSString::Verbatim(_) => {}
}
}
Ok(())
}
fn lex_dstring<'i>(lexer: &mut MyLexer<'i, '_, TokenDString<'i>>) -> Result<()> {
while let Some(tok) = lexer.next_pushed()? {
match tok {
TokenDString::DQuote => break,
TokenDString::DollarLParen => lexer.with_morphed(lex_normal)?,
TokenDString::Variable(_) | TokenDString::Verbatim(_) => {}
}
}
Ok(())
}
fn lex_normal<'i>(lexer: &mut MyLexer<'i, '_, Token<'i>>) -> Result<()> {
while let Some((lpos, token, rpos)) = lexer.next()? {
if token.can_join_left() {
if let Some((_, prev, prev_end)) = lexer.out.last() {
if *prev_end == lpos && prev.can_join_right() {
lexer.out.push((lpos, Token::Join, lpos));
}
}
}
lexer.out.push((lpos, token, rpos));
match token {
Token::SQuote => lexer.with_morphed(lex_sstring)?,
Token::DQuote => lexer.with_morphed(lex_dstring)?,
Token::LBrace => lexer.with_morphed(lex_brace)?,
Token::DollarLParen | Token::LParen => lexer.with_morphed(lex_normal)?,
Token::RParen => break,
_ => {}
}
}
Ok(())
}
fn lex(src: &str) -> Result<Vec<(usize, Token<'_>, usize)>> {
let mut ret = Vec::new();
let mut lexer = MyLexer {
iter: Token::lexer(src).spanned(),
out: &mut ret,
};
lex_normal(&mut lexer)?;
if lexer.iter.next().is_some() {
let (lpos, _, rpos) = *ret.last().unwrap();
return Err(ParseError::new(lpos, rpos, ParseErrorKind::Lex));
}
Ok(ret)
}
pub(crate) fn parse_source(src: &str) -> Result<SourceFile> {
let tokens = lex(src)?;
let parser = grammar::SourceFileParser::new();
let lexer = tokens.iter().copied();
let ast = parser.parse(lexer)?;
Ok(ast)
}