use rayon::prelude::*;
use unicode_segmentation::UnicodeSegmentation;
use crate::pretty_err::{DebugContext, ErrorKind};
#[derive(Debug, Clone)]
pub enum MarkdownTag {
Header(HeaderKind, String),
LineSeparator,
Checkbox(bool, String),
BulletPoint(String),
Blockquote(String),
CodeBlock(String),
Link(LinkKind, String, String),
}
#[derive(Debug, Clone)]
pub enum LinkKind {
Image,
Hyperlink,
}
#[derive(Debug, Clone)]
pub enum HeaderKind {
H1,
H2,
H3,
}
impl From<HeaderKind> for usize {
fn from(value: HeaderKind) -> Self {
match value {
HeaderKind::H1 => 1,
HeaderKind::H2 => 2,
HeaderKind::H3 => 3,
}
}
}
impl From<usize> for HeaderKind {
fn from(val: usize) -> Self {
match val {
1 => HeaderKind::H1,
2 => HeaderKind::H2,
3 => HeaderKind::H3,
_ => HeaderKind::H1,
}
}
}
#[derive(Debug, Clone)]
pub enum Token {
LetExpr(String, String),
Fn(FnKind, String),
Literal(String),
Text(Option<Emphasis>, String),
Comment(String),
Markdown(MarkdownTag),
Newline,
}
#[derive(Debug, Clone)]
pub enum FnKind {
Fmt,
Eval,
}
impl From<FnKind> for String {
fn from(value: FnKind) -> Self {
match value {
FnKind::Fmt => "fmt".to_string(),
FnKind::Eval => "eval".to_string(),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum Emphasis {
Bold,
Italic,
Strikethrough,
}
#[derive(Debug)]
pub struct Lexer {
src: Vec<String>,
pos: usize,
line: usize,
max: usize,
prv: Option<String>,
ctx: DebugContext,
}
impl Lexer {
pub fn new(src: String, ctx: DebugContext) -> Self {
let uni_vec = UnicodeSegmentation::graphemes(src.as_str(), true)
.collect::<Vec<&str>>()
.par_iter()
.map(|&x| x.to_string())
.collect::<Vec<String>>();
let mut uni_vec = uni_vec;
for _ in 0..5 {
uni_vec.push("\n".to_string());
}
let max = uni_vec.len();
Lexer {
src: uni_vec,
pos: 0,
line: 0,
max,
prv: None,
ctx,
}
}
fn create_error(&mut self, err: ErrorKind) {
let src = self.src.join("");
let src = src.split("\n").collect::<Vec<&str>>();
let err_line = src[self.line - 1].to_string();
self.ctx.set_source_code(err_line);
self.ctx.set_position(self.pos);
self.ctx.set_error(err);
}
fn char(&mut self) -> Result<String, DebugContext> {
if self.pos >= self.max {
let e = ErrorKind::AbruptAdieu(format!(
"Reached the end of file looking for position {}",
self.pos
));
self.create_error(e);
return Err(self.ctx.clone());
}
Ok(self.src[self.pos].clone())
}
fn peek(&mut self) -> Result<String, DebugContext> {
if self.pos >= self.max {
let e = ErrorKind::AbruptAdieu(format!(
"Reached the end of file looking for position {}",
self.pos
));
self.create_error(e);
Err(self.ctx.clone())
} else {
Ok(self.src[self.pos + 1].clone())
}
}
fn peek_n(&mut self, n: usize) -> Result<String, DebugContext> {
if self.pos >= self.max {
let e = ErrorKind::AbruptAdieu(format!(
"Reached the end of file looking for position {}",
self.pos
));
self.create_error(e);
Err(self.ctx.clone())
} else {
Ok(self.src[self.pos + n].clone())
}
}
fn advance_char(&mut self) -> Result<(), DebugContext> {
if self.pos >= self.max {
let e = ErrorKind::AbruptAdieu(format!(
"Reached the end of file looking for position {}",
self.pos
));
self.create_error(e);
return Err(self.ctx.clone());
}
self.pos += 1;
Ok(())
}
fn must_consume(&mut self, c: &str) -> Result<(), DebugContext> {
let curr = self.char()?;
if curr != c {
let e = ErrorKind::BrokenExpectations(format!("Expected '{}' but found '{}'", c, curr));
self.create_error(e);
return Err(self.ctx.clone());
}
self.advance_char()?;
Ok(())
}
fn consume_whitespace(&mut self) -> Result<(), DebugContext> {
while self.char()?.trim().is_empty() {
self.advance_char()?;
}
Ok(())
}
fn consume_until_not(&mut self, c: &str) -> Result<String, DebugContext> {
let start = self.pos;
while self.char()? == c {
self.pos += 1;
}
Ok(self.src[start..self.pos].join(""))
}
fn consume_till(&mut self, c: &str) -> Result<String, DebugContext> {
let start = self.pos;
while self.char()? != c {
self.pos += 1;
}
Ok(self.src[start..self.pos].join(""))
}
fn consume_line(&mut self) -> Result<String, DebugContext> {
self.consume_till("\n")
}
fn consume_nested_parenthesis(&mut self) -> Result<String, DebugContext> {
let mut store = String::from(self.char()?);
let mut count = 1;
while count > 0 {
self.advance_char()?;
if self.pos >= self.max {
let e = ErrorKind::LonelyParenthesis("Unmatched parenthesis".to_string());
self.create_error(e);
return Err(self.ctx.clone());
}
let curr = self.char()?;
store.push_str(&curr);
if curr == "(" {
count += 1;
} else if curr == ")" {
count -= 1;
}
}
if count != 0 {
let e = ErrorKind::LonelyParenthesis("Unmatched parenthesis".to_string());
self.create_error(e);
return Err(self.ctx.clone());
}
Ok(store)
}
pub fn next_line(&mut self) -> Result<Option<Vec<Token>>, DebugContext> {
self.line += 1;
if self.pos >= self.max {
return Ok(None);
}
let mut tokens: Vec<Token> = Vec::new();
while let Some(tok) = self.next_token()? {
tokens.push(tok);
}
self.advance_char()?;
if tokens.is_empty() {
return Ok(Some(vec![Token::Newline]));
}
Ok(Some(tokens))
}
fn next_token(&mut self) -> Result<Option<Token>, DebugContext> {
if self.pos >= self.max || self.char()? == "\n" {
if self.char()? == "\n" {
self.prv = Some("\n".to_string());
}
return Ok(None);
}
let curr_tok = self.char()?;
self.prv = None;
if curr_tok == "/" && self.peek()? == "/" {
self.advance_char()?;
self.advance_char()?;
let comment = self.consume_line()?;
let comment = comment.trim();
Ok(Some(Token::Comment(comment.to_string())))
} else if curr_tok == "\"" {
self.advance_char()?;
let literal = self.consume_till("\"")?.to_string();
self.must_consume("\"")?;
return Ok(Some(Token::Literal(literal)));
} else if curr_tok == "l" && self.peek()? == "e" && self.peek_n(2)? == "t" {
self.advance_char()?;
self.advance_char()?;
self.advance_char()?;
if self.char()? != " " {
let e = ErrorKind::GrammarGoblin(
"Let statement should be followed by a space".to_string(),
);
self.create_error(e);
return Err(self.ctx.clone());
}
let var = self.consume_till("=")?.trim().to_string();
if var.is_empty() {
let e = ErrorKind::NamelessNomad("Variable name cannot be empty".to_string());
self.create_error(e);
return Err(self.ctx.clone());
}
self.must_consume("=")?;
let mut val = self.consume_till(";")?.trim().to_string();
self.must_consume(";")?;
val.push_str(";");
return Ok(Some(Token::LetExpr(var, val)));
} else if curr_tok == "$" && self.peek()? == "(" {
self.advance_char()?;
self.advance_char()?;
self.must_consume(")")?;
let fmt = self.consume_till(")")?.to_string();
return Ok(Some(Token::Fn(FnKind::Fmt, fmt)));
} else if curr_tok == "f" && self.peek()? == "m" && self.peek_n(2)? == "t" {
self.advance_char()?;
self.advance_char()?;
self.advance_char()?;
self.must_consume("(")?;
let mut fmt = String::new();
if self.char()? != ")" {
fmt = self.consume_nested_parenthesis()?.trim().to_string();
fmt.pop();
}
self.must_consume(")")?;
return Ok(Some(Token::Fn(FnKind::Fmt, fmt)));
} else if curr_tok == "e"
&& self.peek()? == "v"
&& self.peek_n(2)? == "a"
&& self.peek_n(3)? == "l"
{
self.advance_char()?;
self.advance_char()?;
self.advance_char()?;
self.advance_char()?;
self.must_consume("(")?;
let mut eval = String::new();
if self.char()? != ")" {
eval = self.consume_nested_parenthesis()?.trim().to_string();
eval.pop();
}
self.must_consume(")")?;
return Ok(Some(Token::Fn(FnKind::Eval, eval)));
} else if curr_tok == "#" {
let hash_count = self.consume_until_not("#")?.len();
let heading = self.consume_line()?;
let heading = heading.trim();
let header_kind: HeaderKind = hash_count.into();
return Ok(Some(Token::Markdown(MarkdownTag::Header(
header_kind,
heading.to_string(),
))));
} else if curr_tok == ">" {
self.advance_char()?;
if self.prv.is_some() && self.prv.clone().unwrap() == "\n" {
let blockquote = self.consume_line()?;
let blockquote = blockquote.trim();
return Ok(Some(Token::Markdown(MarkdownTag::Blockquote(
blockquote.to_string(),
))));
} else {
return Ok(Some(Token::Text(None, String::from(">"))));
}
} else if curr_tok == "-" {
self.advance_char()?;
self.consume_whitespace()?;
let mut is_bullet = self.char()? != "[";
let is_next_alnum = self.peek()?.chars().next().unwrap().is_ascii_alphanumeric();
is_bullet = is_bullet && is_next_alnum;
if is_bullet {
let bullet = self.consume_line()?;
let bullet = bullet.trim();
return Ok(Some(Token::Markdown(MarkdownTag::BulletPoint(
bullet.to_string(),
))));
}
let is_checkbox = self.char()? == "[";
if is_checkbox {
self.advance_char()?;
let is_checked = self.char()? == "x";
self.advance_char()?;
self.must_consume("]")?;
self.consume_whitespace()?;
let checkbox = self.consume_line()?;
let checkbox = checkbox.trim();
return Ok(Some(Token::Markdown(MarkdownTag::Checkbox(
is_checked,
checkbox.to_string(),
))));
}
return Ok(Some(Token::Text(None, curr_tok.to_string())));
} else if curr_tok == "=" && self.peek()? == "=" && self.peek_n(2)? == "=" {
let prev = self.pos;
self.consume_until_not("=")?;
let now = self.pos;
if (now - prev == 3) && !self.consume_line()?.trim().is_empty() {
let e = ErrorKind::GrammarGoblin(
"Line separator should contain only '=' characters".to_string(),
);
self.create_error(e);
return Err(self.ctx.clone());
}
return Ok(Some(Token::Markdown(MarkdownTag::LineSeparator)));
} else if (curr_tok == "!" && self.peek()? == "[") || curr_tok == "[" {
let is_image = curr_tok == "!";
if is_image {
self.advance_char()?;
}
self.must_consume("[")?;
let text = self.consume_till("]")?.to_string();
self.must_consume("]")?;
self.must_consume("(")?;
let link = self.consume_till(")")?.to_string();
self.must_consume(")")?;
return Ok(Some(Token::Markdown(MarkdownTag::Link(
if is_image {
LinkKind::Image
} else {
LinkKind::Hyperlink
},
text,
link,
))));
} else if curr_tok == "`" {
let code: String;
if self.peek()? == "`" {
self.must_consume("`")?;
self.must_consume("`")?;
self.must_consume("`")?;
self.consume_whitespace()?;
code = self.consume_till("`")?.to_string();
self.must_consume("`")?;
self.must_consume("`")?;
self.must_consume("`")?;
} else {
self.must_consume("`")?;
code = self.consume_till("`")?.trim().to_string();
self.must_consume("`")?;
}
return Ok(Some(Token::Markdown(MarkdownTag::CodeBlock(code))));
} else if curr_tok == "*" {
if self.peek()? == "*" {
self.advance_char()?;
self.advance_char()?;
let text = self.consume_till("*")?.to_string();
self.must_consume("*")?;
self.must_consume("*")?;
return Ok(Some(Token::Text(Some(Emphasis::Bold), text)));
} else {
self.advance_char()?;
let text = self.consume_till("*")?.to_string();
self.must_consume("*")?;
return Ok(Some(Token::Text(Some(Emphasis::Italic), text)));
}
} else if curr_tok == "~" {
self.advance_char()?;
let text = self.consume_till("~")?.to_string();
self.must_consume("~")?;
return Ok(Some(Token::Text(Some(Emphasis::Strikethrough), text)));
} else {
let text = curr_tok.to_string();
self.advance_char()?;
return Ok(Some(Token::Text(None, text)));
}
}
pub fn compact(tokens: Vec<Token>) -> Vec<Token> {
let mut compacted = Vec::new();
let mut iter = tokens.iter().peekable();
while let Some(token) = iter.next() {
match token {
Token::Text(emphasis, text) => {
let mut combined_text = text.clone();
while let Some(&Token::Text(next_emphasis, next_text)) = iter.peek() {
if emphasis == next_emphasis {
combined_text.push_str(next_text);
iter.next(); } else {
break;
}
}
compacted.push(Token::Text(emphasis.clone(), combined_text));
}
_ => compacted.push(token.clone()),
}
}
compacted
}
}