use std::{borrow::Cow, fmt::Display};
use regex::Regex;
use self::{error::SpannedLexingError, tokenizer::Tokenizer};
pub mod error;
mod tokenizer;
#[cfg(test)]
mod test;
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone)]
pub struct TokenStream {
pub original_file: String,
tokens: Vec<Token>,
}
impl TokenStream {
pub fn replace(src: &str) -> Cow<str> {
let re = Regex::new(r"(?m)^(?<space>\s*)///(?<content>|[^/].*)$").unwrap();
let src_new = re.replace_all(src, r##"$space#[doc = r#"$content"#]"##);
src_new
}
pub fn lex(src: &str) -> Result<Self, SpannedLexingError> {
let src = Self::replace(src);
let mut tokenizer = Tokenizer::new(&src);
let mut tokens = Vec::new();
while let Some(tok) = tokenizer.next_token()? {
tokens.push(tok);
}
let tokens = tokens
.into_iter()
.filter(|token| !matches!(token.kind, TokenKind::Comment(_)))
.collect();
Ok(Self {
tokens,
original_file: src.to_string(),
})
}
pub fn get(&self, index: usize) -> Option<&Token> {
self.tokens.get(index)
}
pub fn peek(&self) -> Option<&Token> {
self.tokens.last()
}
pub fn pop(&mut self) -> Token {
self.tokens.pop().expect("This should not be emtpy")
}
pub fn reverse(&mut self) {
self.tokens.reverse()
}
pub fn is_empty(&self) -> bool {
self.tokens.is_empty()
}
}
#[derive(Debug, Default, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)]
pub struct TokenSpan {
pub start: usize,
pub end: usize,
}
impl TokenSpan {
pub fn from_range(start: TokenSpan, end: TokenSpan) -> Self {
Self {
start: start.start,
end: end.end,
}
}
}
#[derive(Debug, Default, PartialEq, PartialOrd, Ord, Eq, Clone)]
pub struct Token {
pub span: TokenSpan,
pub kind: TokenKind,
}
impl Token {
pub fn kind(&self) -> &TokenKind {
&self.kind
}
pub fn span(&self) -> &TokenSpan {
&self.span
}
}
#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub enum TokenKind {
Keyword(Keyword),
AttributeKeyword(AttributeKeyword),
Identifier(String),
Colon,
Semicolon,
Comma,
Arrow,
PoundSign,
EqualsSign,
StringLiteral(String),
CurlyBracketOpen,
CurlyBracketClose,
CurvedBracketOpen,
CurvedBracketClose,
AngledBracketOpen,
AngledBracketClose,
SquareBracketOpen,
SquareBracketClose,
Comment(String),
#[default]
DefaultTokenKind,
}
impl TokenKind {
pub fn same_kind(&self, other: &TokenKind) -> bool {
if let TokenKind::Identifier(_) = self {
if let TokenKind::Identifier(_) = other {
return true;
}
}
if let TokenKind::AttributeKeyword(_) = self {
if let TokenKind::AttributeKeyword(_) = other {
return true;
}
}
if let TokenKind::StringLiteral(_) = self {
if let TokenKind::StringLiteral(_) = other {
return true;
}
}
if let TokenKind::Comment(_) = self {
if let TokenKind::Comment(_) = other {
return true;
}
}
self == other
}
}
impl Display for TokenKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TokenKind::Keyword(word) => write!(f, "KEYWORD({})", word),
TokenKind::AttributeKeyword(word) => write!(f, "ATTRIBUTE_KEYWORD({})", word),
TokenKind::Identifier(ident) => {
if ident.is_empty() {
write!(f, "IDENTIFIER")
} else {
write!(f, "IDENTIFIER({})", ident)
}
}
TokenKind::EqualsSign => f.write_str("EQUALS_SIGN"),
TokenKind::PoundSign => f.write_str("POUND_SIGN"),
TokenKind::Colon => f.write_str("COLON"),
TokenKind::Semicolon => f.write_str("SEMICOLON"),
TokenKind::Comma => f.write_str("COMMA"),
TokenKind::Arrow => f.write_str("ARROW"),
TokenKind::StringLiteral(text) => write!(f, r#"STRING_LITERAL("{}")"#, text),
TokenKind::CurlyBracketOpen => f.write_str("CURLY_BRACKET_OPEN"),
TokenKind::CurlyBracketClose => f.write_str("CURLY_BRACKET_CLOSE"),
TokenKind::CurvedBracketOpen => f.write_str("CURVED_BRACKET_OPEN"),
TokenKind::CurvedBracketClose => f.write_str("CURVED_BRACKET_CLOSE"),
TokenKind::AngledBracketOpen => f.write_str("ANGLED_BRACKET_OPEN"),
TokenKind::AngledBracketClose => f.write_str("ANGLED_BRACKET_CLOSE"),
TokenKind::SquareBracketOpen => f.write_str("SQUARE_BRACKET_OPEN"),
TokenKind::SquareBracketClose => f.write_str("SQUARE_BRACKET_CLOSE"),
TokenKind::DefaultTokenKind => f.write_str("DEFAULT_TOKEN_KIND"),
TokenKind::Comment(text) => write!(f, "COMMENT({})", text),
}
}
}
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)]
pub enum Keyword {
#[allow(non_camel_case_types)]
r#mod,
#[allow(non_camel_case_types)]
r#fn,
#[allow(non_camel_case_types)]
r#struct,
#[allow(non_camel_case_types)]
r#enum,
}
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)]
pub enum AttributeKeyword {
#[allow(non_camel_case_types)]
derive,
#[allow(non_camel_case_types)]
doc,
#[allow(non_camel_case_types)]
error,
}
impl Display for Keyword {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Keyword::r#mod => f.write_str("mod"),
Keyword::r#fn => f.write_str("fn"),
Keyword::r#struct => f.write_str("struct"),
Keyword::r#enum => f.write_str("enum"),
}
}
}
impl Display for AttributeKeyword {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
AttributeKeyword::derive => f.write_str("derive"),
AttributeKeyword::doc => f.write_str("doc"),
AttributeKeyword::error => f.write_str("error"),
}
}
}
#[macro_export]
macro_rules! token {
[Semicolon] => { $crate::parser::lexing::TokenKind::Semicolon };
[;] => { $crate::parser::lexing::TokenKind::Semicolon };
[Colon] => { $crate::parser::lexing::TokenKind::Colon };
[:] => { $crate::parser::lexing::TokenKind::Colon };
[Comma] => { $crate::parser::lexing::TokenKind::Comma };
[,] => { $crate::parser::lexing::TokenKind::Comma };
[Arrow] => { $crate::parser::lexing::TokenKind::Arrow };
[->] => { $crate::parser::lexing::TokenKind::Arrow };
[PoundSign] => { $crate::parser::lexing::TokenKind::PoundSign };
[#] => { $crate::parser::lexing::TokenKind::PoundSign };
[EqualsSign] => { $crate::parser::lexing::TokenKind::EqualsSign };
[=] => { $crate::parser::lexing::TokenKind::EqualsSign };
[AngledBracketOpen] => { $crate::parser::lexing::TokenKind::AngledBracketOpen };
[<] => { $crate::parser::lexing::TokenKind::AngledBracketOpen };
[AngledBracketClose] => { $crate::parser::lexing::TokenKind::AngledBracketClose };
[>] => { $crate::parser::lexing::TokenKind::AngledBracketClose };
[CurlyBracketOpen] => { $crate::parser::lexing::TokenKind::CurlyBracketOpen};
[CurlyBracketClose] => { $crate::parser::lexing::TokenKind::CurlyBracketClose};
[CurvedBracketOpen] => { $crate::parser::lexing::TokenKind::CurvedBracketOpen};
[CurvedBracketClose] => { $crate::parser::lexing::TokenKind::CurvedBracketClose};
[SquareBracketOpen] => { $crate::parser::lexing::TokenKind::SquareBracketOpen};
[SquareBracketClose] => { $crate::parser::lexing::TokenKind::SquareBracketClose};
[mod] => { $crate::parser::lexing::TokenKind::Keyword($crate::parser::lexing::Keyword::r#mod) };
[fn] => { $crate::parser::lexing::TokenKind::Keyword($crate::parser::lexing::Keyword::r#fn) };
[struct] => { $crate::parser::lexing::TokenKind::Keyword($crate::parser::lexing::Keyword::r#struct) };
[enum] => { $crate::parser::lexing::TokenKind::Keyword($crate::parser::lexing::Keyword::r#enum) };
[AttributeKeyword] => { $crate::parser::lexing::TokenKind::AttributeKeyword($crate::parser::lexing::AttributeKeyword::derive) };
[Ident] => { $crate::parser::lexing::TokenKind::Identifier("".to_owned()) };
[Identifier] => { $crate::parser::lexing::TokenKind::Identifier("".to_owned()) };
[StringLiteral] => { $crate::parser::lexing::TokenKind::StringLiteral("".to_owned()) };
[Comment] => { $crate::parser::lexing::TokenKind::Comment("".to_owned()) };
}
#[cfg(test)]
mod tests {
use super::TokenKind;
use crate::token;
macro_rules! token_macro_test {
($name:ident, $from:tt, => $to:expr) => {
#[test]
fn $name() {
let got: TokenKind = token![$from];
let should_be = $to;
assert_eq!(got, should_be);
}
};
($name:ident, $from:tt, => $to:expr) => {
#[test]
fn $name() {
let got: TokenKind = token![$from];
let should_be = $to;
assert_eq!(got, should_be);
}
};
}
token_macro_test!(tok_expands_to_arrow, ->, => TokenKind::Arrow);
token_macro_test!(tok_expands_to_semicolon, Semicolon, => TokenKind::Semicolon);
token_macro_test!(tok_expands_to_mod, mod, => TokenKind::Keyword(crate::parser::lexing::Keyword::r#mod));
token_macro_test!(tok_expands_to_fn, fn, => TokenKind::Keyword(crate::parser::lexing::Keyword::r#fn));
}