use std::{
collections::{HashMap, HashSet},
fmt,
iter::{self, Enumerate},
mem, sync,
};
use crate::{
boo::Boo,
builder::BuildTarget,
func::Callable,
stack::{Avid, Stack},
Ast, Error, ErrorKind, Location,
};
#[derive(Debug)]
pub(crate) struct Lexer<'a> {
#[allow(unused)]
original: &'a str, current_file_name: Option<sync::Arc<String>>,
src: iter::Peekable<iter::Enumerate<std::str::Lines<'a>>>,
current_line: iter::Peekable<std::str::CharIndices<'a>>,
current_line_number: usize,
is_neg_num: bool,
}
#[derive(Debug, Clone, PartialEq)]
pub(crate) struct Token {
pub data: TokenData,
pub loc: Location,
}
impl Token {
pub fn new(data: TokenData, loc: Location) -> Self {
Self { data, loc }
}
}
enum_type! {
hidden TokenData {
Int(isize),
String(String),
Promise(String),
Bool(bool),
If,
StartWhile,
DoWhile,
End
}
}
impl Clone for TokenData {
fn clone(&self) -> Self {
match self {
Self::Int(arg0) => Self::Int(*arg0),
Self::String(arg0) => Self::String(arg0.clone()),
Self::Promise(arg0) => Self::Promise(arg0.clone()),
Self::Bool(arg0) => Self::Bool(*arg0),
Self::If => Self::If,
Self::StartWhile => Self::StartWhile,
Self::DoWhile => Self::DoWhile,
Self::End => Self::End,
}
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Operation {
pub(crate) typ: OpType,
pub(crate) loc: Location,
}
impl Operation {
pub fn new(typ: OpType, loc: Location) -> Self {
Self { typ, loc }
}
}
pub(crate) enum OpType {
Int(isize),
String(String),
Bool(bool),
Provided(usize),
Promise(String),
If(usize),
WhileMarker,
DoMarker,
End(Option<usize>),
}
assert_send!(Operation);
impl PartialEq for OpType {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::Int(l0), Self::Int(r0)) => l0 == r0,
(Self::Bool(b1), Self::Bool(b2)) => b1 == b2,
(Self::String(l0), Self::String(r0)) => l0 == r0,
(Self::Provided(i), Self::Provided(l)) => i == l,
(Self::Promise(l0), Self::Promise(r0)) => l0 == r0,
(Self::WhileMarker, Self::WhileMarker) => true,
(Self::DoMarker, Self::DoMarker) => true,
_ => false,
}
}
}
impl fmt::Debug for OpType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Int(arg0) => f.debug_tuple("Int").field(arg0).finish(),
Self::String(arg0) => f.debug_tuple("String").field(arg0).finish(),
Self::Provided(arg0) => f.debug_tuple("Provided").field(arg0).finish(),
Self::Promise(arg0) => f.debug_tuple("Promise").field(arg0).finish(),
Self::If(i) => f.debug_tuple("If").field(i).finish(),
Self::End(e) => f.debug_tuple("End").field(e).finish(),
Self::Bool(b) => f.debug_tuple("Bool").field(b).finish(),
Self::WhileMarker => write!(f, "While"),
Self::DoMarker => write!(f, "Do"),
}
}
}
impl<'a> Lexer<'a> {
pub fn new(src: &'a str, src_name: Option<String>) -> Self {
Self {
original: src,
current_file_name: src_name.map(sync::Arc::new),
src: src.lines().enumerate().peekable(),
current_line: "".char_indices().peekable(),
current_line_number: 0,
is_neg_num: false,
}
}
pub(crate) fn parse_str(&mut self) -> crate::Result<Token> {
assert_eq!(self.current_line.next().map(|x| x.1), Some('"'));
let mut s = String::new();
let mut last_idx = 0;
let mut start_idx = None;
while let Some((_, col, ch)) = self.next_ch() {
if start_idx == None {
start_idx = Some(col);
}
last_idx = col;
match ch {
'"' => {
return Ok(Token::new(
TokenData::String(s),
self.current_loc_from_col(start_idx.unwrap()),
))
}
'\\' => {
if let Some((_, col, ch)) = self.next_ch() {
last_idx = col;
let to_push = match ch {
'n' => '\n',
'\n' => continue,
't' => '\t',
'\\' => '\\',
'"' => '"',
'\'' => '\'',
'r' => '\r',
_ => {
return Err(Error::new(
ErrorKind::UnknownEscape,
self.current_loc_from_col(col),
))
}
};
s.push(to_push);
} else {
return Err(Error::new(
ErrorKind::UnclosedString,
self.current_loc_from_col(last_idx),
));
}
}
_ => s.push(ch),
}
}
Err(Error::new(
ErrorKind::UnclosedString,
self.current_loc_from_col(last_idx),
))
}
fn parse_identifier(&mut self, to_prepend: Option<char>) -> crate::Result<Token> {
let col_num = self.peek_ch().map_or(0, |s| s.1);
let mut ident = self
.current_line
.by_ref()
.map(|x| x.1)
.take_while(|ch| *ch != ' ')
.collect::<String>();
if let Some(ch) = to_prepend {
ident.insert(0, ch);
}
let tok = match ident.as_str() {
"if" => TokenData::If,
"end" => TokenData::End,
"false" => TokenData::Bool(false),
"true" => TokenData::Bool(true),
"while" => TokenData::StartWhile,
"do" => TokenData::DoWhile,
_ => TokenData::Promise(ident),
};
Ok(Token::new(tok, self.current_loc_from_col(col_num)))
}
fn current_loc_from_col(&self, col: usize) -> Location {
Location {
col: col + 1,
line: self.current_line_number + 1,
file_name: self.current_file_name.as_ref().map(sync::Arc::clone),
}
}
fn parse_number(&mut self, base: u32) -> crate::Result<Token> {
let sign = if self.is_neg_num { -1 } else { 1 };
self.is_neg_num = false;
let col = self.peek_ch().map_or(0, |s| s.1);
let s = self
.current_line
.by_ref()
.map(|x| x.1)
.take_while(|x| *x != ' ')
.collect::<String>();
match isize::from_str_radix(s.as_str(), base) {
Err(_) => Err(Error::new(
ErrorKind::IncorrectNumber,
self.current_loc_from_col(col),
)),
Ok(n) => Ok(Token::new(
TokenData::Int(n * sign),
self.current_loc_from_col(col),
)),
}
}
fn peek_ch(&mut self) -> Option<(usize, usize, char)> {
match self.current_line.peek() {
Some((col, ch)) => Some((self.current_line_number, *col, *ch)),
None => {
let (num, line) = self.src.peek()?;
let mut peekable = line.char_indices().peekable();
let ch = peekable.peek()?;
Some((*num, ch.0, ch.1))
}
}
}
fn next_ch(&mut self) -> Option<(usize, usize, char)> {
match self.current_line.next() {
Some((col, ch)) => Some((self.current_line_number, col, ch)),
None => {
match self.src.next() {
Some((num, line)) => {
let prev_num = self.current_line_number;
self.current_line_number = num;
self.current_line = line.char_indices().peekable();
Some((prev_num, 0, '\n'))
}
None => None,
}
}
}
}
fn skip_comment(&mut self) {
let mut last_star = false;
while let Some((_, _, ch)) = self.next_ch() {
if ch == '*' {
last_star = true;
} else if ch == '/' && last_star {
return;
} else {
last_star = false;
}
}
}
}
impl<'a> iter::Iterator for Lexer<'a> {
type Item = crate::Result<Token>;
fn next(&mut self) -> Option<crate::Result<Token>> {
if let Some((start_col, ch)) = self.current_line.peek() {
let start_col = *start_col;
match ch {
'"' => Some(self.parse_str()),
' ' => {
self.current_line.next(); self.next()
}
'0' => {
self.current_line.next(); if let Some((col, ch)) = self.current_line.peek() {
let col = *col;
let base = match ch {
'x' => 16,
'b' => 2,
'o' => 8,
'0'..='9' => 10,
' ' | '\n' | '\t' => {
return Some(Ok(Token::new(
TokenData::Int(0),
self.current_loc_from_col(col),
)))
}
_ => {
return Some(Err(Error::new(
ErrorKind::IncorrectNumber,
self.current_loc_from_col(col),
)))
}
};
self.current_line.next();
Some(self.parse_number(base))
} else {
Some(Ok(Token::new(
TokenData::Int(0),
self.current_loc_from_col(start_col),
)))
}
}
'1'..='9' => Some(self.parse_number(10)),
'-' => {
self.current_line.next();
if let Some((_, ch)) = self.current_line.peek() {
if ch.is_numeric() {
self.is_neg_num = true;
self.next()
} else {
Some(self.parse_identifier(Some('-')))
}
} else {
Some(Ok(Token::new(
TokenData::Promise("-".to_string()),
self.current_loc_from_col(start_col),
)))
}
}
'/' => {
self.current_line.next(); if let Some((_, ch)) = self.current_line.peek() {
if *ch == '*' {
self.skip_comment();
self.next()
} else {
Some(self.parse_identifier(Some('/')))
}
} else {
Some(Ok(Token::new(
TokenData::Promise("/".to_string()),
self.current_loc_from_col(start_col),
)))
}
}
_ => Some(self.parse_identifier(None)),
}
} else {
let (line_num, line) = self.src.next()?;
self.current_line_number = line_num;
self.current_line = line.char_indices().peekable();
self.next()
}
}
}
pub(crate) struct Parser<'a, 'f, T: BuildTarget<'f, 'f>> {
toks: Option<Enumerate<Lexer<'a>>>,
promised: HashSet<String>,
provided: HashMap<String, (usize, Option<T::Function>)>,
provided_vec: Vec<Option<T::Function>>,
cf_ops: Vec<CFOp>,
operations: Vec<Operation>,
}
#[derive(Debug)]
struct CFOp {
loc: Location,
idx: usize,
typ: CFOpType,
}
impl From<(Location, usize, CFOpType)> for CFOp {
fn from(a: (Location, usize, CFOpType)) -> Self {
Self {
loc: a.0,
idx: a.1,
typ: a.2,
}
}
}
#[derive(Debug)]
enum CFOpType {
If,
StartWhile,
DoWhile,
}
impl<'a, 'f> Parser<'a, 'f, Avid<'f, 'f>> {
pub fn parse(mut self) -> crate::Result<Avid<'f, 'f>> {
let ops = self.all_ops()?.into_iter().map(Boo::from).collect();
let provided = self
.provided_vec
.into_iter()
.map(|x| x.map(Boo::<'f, Callable<'f, 'f>>::from))
.collect();
Ok(Avid {
stack: Stack::new(),
ops,
current_op: 0,
provided,
})
}
}
impl<'a, 'f> Parser<'a, 'f, Ast<'f>> {
pub fn parse(mut self) -> crate::Result<Ast<'f>> {
let ops = self.all_ops()?;
Ok(Ast {
ops,
provided: self.provided_vec.into_iter().collect(),
})
}
}
impl<'a, 'f, T: BuildTarget<'f, 'f>> Parser<'a, 'f, T> {
pub fn all_ops(&mut self) -> crate::Result<Vec<Operation>> {
while let Some(res) = self.next() {
match res {
Ok(o) => {
self.operations.push(o);
}
Err(e) => return Err(e),
}
}
if let Some(CFOp {
loc: location,
idx: _,
typ: op,
}) = self.cf_ops.pop()
{
let err = match op {
CFOpType::If => ErrorKind::UnclosedIf,
CFOpType::StartWhile | CFOpType::DoWhile => ErrorKind::UnclosedWhile,
};
return Err(Error::new(err, location));
}
let mut buffer = Vec::new();
mem::swap(&mut buffer, &mut self.operations);
Ok(buffer)
}
pub fn new(
src: &'a str,
src_name: Option<String>,
promised: HashSet<String>,
provided: HashMap<String, T::Function>,
) -> Self {
let (keys, vals): (Vec<_>, Vec<_>) = provided.into_iter().unzip();
let provided: HashMap<_, _> = keys
.into_iter()
.zip(vals.into_iter().map(Option::Some).enumerate())
.collect();
let provided_len = provided.len();
let mut provided_vec = Vec::with_capacity(provided_len);
for _ in 0..provided_len {
provided_vec.push(None);
}
Self {
toks: Some(Lexer::new(src, src_name).enumerate()),
operations: Vec::new(),
cf_ops: Vec::new(),
promised,
provided,
provided_vec,
}
}
fn next_tok(&mut self) -> Option<(usize, crate::Result<Token>)> {
self.toks.as_mut().map(|x| x.next()).unwrap()
}
fn process_end_tok(&mut self, idx: usize, tok: &Token) -> crate::Result<OpType> {
if let Some(CFOp {
loc: location,
idx: cf_idx,
typ: op,
}) = self.cf_ops.pop()
{
match op {
CFOpType::If => {
self.operations
.insert(cf_idx, Operation::new(OpType::If(idx - cf_idx), location));
Ok(OpType::End(None))
}
CFOpType::DoWhile => match self.cf_ops.pop() {
Some(CFOp {
loc: _start_loc,
idx: start_idx,
typ: CFOpType::StartWhile,
}) => {
assert_eq!(
self.operations[start_idx].typ,
OpType::WhileMarker,
"{start_idx}"
);
assert_eq!(self.operations[cf_idx].typ, OpType::DoMarker, "{start_idx}");
self.operations[cf_idx].typ = OpType::If(idx - cf_idx);
let offset = idx - start_idx;
Ok(OpType::End(Some(offset)))
}
Some(_) | None => {
Err(Error::new(ErrorKind::DoWithoutWhile, location))
}
},
CFOpType::StartWhile => {
Err(Error::new(ErrorKind::WhileWithoutDo, location))
}
}
} else {
Err(Error::new(ErrorKind::UnpairedEnd, tok.loc.clone()))
}
}
}
impl<'a, 'f, T: BuildTarget<'f, 'f>> Iterator for Parser<'a, 'f, T> {
type Item = crate::Result<Operation>;
fn next(&mut self) -> Option<Self::Item> {
let (idx, tok) = self.next_tok()?;
if let Err(e) = tok {
return Some(Err(e));
}
let tok = tok.unwrap();
let op = match tok.data {
TokenData::Int(i) => OpType::Int(i),
TokenData::String(s) => OpType::String(s),
TokenData::Bool(b) => OpType::Bool(b),
TokenData::If => {
self.cf_ops.push((tok.loc.clone(), idx, CFOpType::If).into());
return self.next();
}
TokenData::StartWhile => {
self.cf_ops
.push((tok.loc.clone(), idx, CFOpType::StartWhile).into());
return Some(Ok(Operation::new(OpType::WhileMarker, tok.loc.clone())));
}
TokenData::DoWhile => {
self.cf_ops
.push((tok.loc.clone(), idx, CFOpType::DoWhile).into());
return Some(Ok(Operation::new(OpType::DoMarker, tok.loc.clone())));
}
TokenData::End => match self.process_end_tok(idx, &tok) {
Ok(o) => o,
Err(e) => return Some(Err(e)),
},
TokenData::Promise(p) => {
if let Some((idx, obj)) = self.provided.get_mut(&p) {
if let Some(obj) = obj.take() {
self.provided_vec[*idx] = Some(obj);
}
OpType::Provided(*idx)
} else if self.promised.contains(&p) {
OpType::Promise(p)
} else {
return Some(Err(Error::new(ErrorKind::UnknownVar { name: p }, tok.loc)));
}
}
};
Some(Ok(Operation::new(op, tok.loc)))
}
}