#[macro_use]
pub mod errors;
pub mod lexer;
pub mod tokens;
use std::collections::HashMap;
use tokens::{Token, TokenVal};
type NameToPosition = HashMap<String, usize>;
pub fn parse(tokens: &Vec<Token>) -> Result<Ast, String> {
let mut ast = Ast::new(tokens, Vec::new());
while ast.idx < tokens.len() {
let instruc = if tokens[ast.idx].val.is_function() {
ast.parse_fn()?
} else {
ast.parse_line()?
};
ast.nodes.push(instruc);
}
Ok(ast)
}
pub enum Node {
Command(Vec<Token>), Function(String, Vec<Node>, usize), Assignment(String, Token), }
pub struct Ast<'a> {
pub nodes: Vec<Node>,
tokens: &'a Vec<Token>, idx: usize, row: usize, functions: NameToPosition,
}
impl<'a> Ast<'a> {
fn parse_line(&mut self) -> Result<Node, String> {
if let TokenVal::Equal = &self.tokens[self.idx + 1].val {
return self.parse_assignement();
} else {
return self.parse_command();
}
}
fn parse_command(&mut self) -> Result<Node, String> {
self.row = self.tokens[self.idx].row;
let mut res: Vec<Token> = Vec::new();
while self.idx < self.tokens.len() && self.row == self.tokens[self.idx].row {
res.push(self.tokens[self.idx].clone());
self.idx += 1;
}
Ok(Node::Command(res))
}
fn parse_assignement(&mut self) -> Result<Node, String> {
let var_name = self.tokens[self.idx].val.get_word_content();
let line = self.tokens[self.idx].row;
self.idx += 3;
if cfg!(debug_assertions) {
if line == self.tokens[self.idx].row {
todo!("maybe stop making assumptions about variables?")
}
}
Ok(Node::Assignment(
var_name,
self.tokens[self.idx - 1].clone(),
))
}
fn parse_fn(&mut self) -> Result<Node, String> {
if !self.is_valid_function_line() {
self._error("expecting function name_of_function() {")?;
}
let mut block: Vec<Node> = Vec::new();
let func_name = self.tokens[self.idx + 1].val.get_word_content();
let row = self.tokens[self.idx].row;
self.idx += 5;
let mut nbr_of_non_closed_brackets = 1;
while nbr_of_non_closed_brackets > 0 && self.idx < self.tokens.len() {
match self.tokens[self.idx].val {
TokenVal::CloseBra => {
nbr_of_non_closed_brackets -= 1;
self.idx += 1;
}
TokenVal::OpenBra => {
self.idx += 1;
nbr_of_non_closed_brackets += 1;
}
_ => block.push(self.parse_line()?),
}
}
if nbr_of_non_closed_brackets > 0 {
self._error("function block was not closed, missing } ?")?;
}
let pos = self.nodes.len();
self.functions.insert(func_name.to_owned(), pos);
Ok(Node::Function(func_name, block, row))
}
fn is_valid_function_line(&self) -> bool {
match (
&self.tokens[self.idx + 1].val,
&self.tokens[self.idx + 2].val,
&self.tokens[self.idx + 3].val,
&self.tokens[self.idx + 4].val,
) {
(TokenVal::Const(_), TokenVal::OpenPar, TokenVal::ClosePar, TokenVal::OpenBra) => true,
_ => false,
}
}
fn _error(&self, reason: &'static str) -> Result<Node, String> {
return Err(format!(
"malformed expression at {}:{}, {}",
self.tokens[self.idx].row, self.tokens[self.idx].col, reason
));
}
pub fn print(&self) {
println!("{}", self.to_string());
}
pub fn to_string(&self) -> String {
let mut buff = String::new();
for i in 0..self.nodes.len() {
self.nodes[i].to_string_buffer(&mut buff);
}
buff.pop();
buff
}
pub fn new(tokens: &'a Vec<Token>, nodes: Vec<Node>) -> Self {
Self {
tokens,
nodes,
idx: 0,
row: 1,
functions: std::collections::HashMap::new(),
}
}
}
impl Node {
pub fn to_string(&self) -> String {
let mut res = String::new();
self.to_string_buffer(&mut res);
res
}
pub fn to_string_buffer(&self, buffer: &mut String) {
match self {
Node::Function(name, nodes, row) => {
buffer.push_str(&format!("{}: Function({}) \n", row, name));
for node in nodes.iter() {
buffer.push_str(" | ");
node.to_string_buffer(buffer);
}
}
Node::Assignment(name, val) => {
let row = val.row;
if name.starts_with("_") {
buffer.push_str(&format!("{}: LocalAssignement({}) \n", row, name))
} else {
buffer.push_str(&format!("{}: GlobalAssignement({})\n", row, name))
}
}
Node::Command(tokens) => {
let row = &tokens[0].row;
let cmd_name = &tokens[0].literal_value();
buffer.push_str(&format!("{}: Command({})\n", row, cmd_name))
}
}
}
}
#[cfg(test)]
mod parser_tests {
use super::*;
use insta::{self, *};
#[test]
fn it_can_parse_a_basic_script() {
let text = include_bytes!("../../tests/inputs/parser/script2.sh");
let l = lexer::Lexer::new();
let symbols = l.tokenize(text).unwrap();
let ast = parse(&symbols).unwrap();
let mut settings = insta::Settings::clone_current();
settings.set_snapshot_path("../../tests/snapshots");
settings.set_description(
"line number: beginning_token_index-> wnd_token_index InstructioType(name)",
);
settings.bind(|| assert_snapshot!(ast.to_string()));
}
}