mssh 0.0.0

Mssh Simple SHell. Bash interpreter/compiler. Will not support all the functionalities.
#[macro_use]
pub mod errors;
pub mod lexer;
pub mod tokens;

use std::collections::HashMap;

use tokens::{Token, TokenVal};

type NameToPosition = HashMap<String, usize>;

pub fn parse(tokens: &Vec<Token>) -> Result<Ast, String> {
    let mut ast = Ast::new(tokens, Vec::new());
    while ast.idx < tokens.len() {
        // first world of the line
        let instruc = if tokens[ast.idx].val.is_function() {
            ast.parse_fn()?
        } else {
            ast.parse_line()?
        };
        ast.nodes.push(instruc);
    }
    Ok(ast)
}

pub enum Node {
    Command(Vec<Token>),                // function or binary invocation,
    Function(String, Vec<Node>, usize), // function definition with its body and row
    Assignment(String, Token),          // variable assignment, Var or Vars
}

pub struct Ast<'a> {
    // not designed to execute the commands
    pub nodes: Vec<Node>,
    tokens: &'a Vec<Token>, //output of the lexer
    idx: usize,             // index of the token being processed
    row: usize,             // line being processed
    functions: NameToPosition,
}

impl<'a> Ast<'a> {
    // does not handle edges cases like an invalid function definition
    // in the last line
    // single.val on last line will make it crash
    fn parse_line(&mut self) -> Result<Node, String> {
        // assumes that the parser is in a consistent state
        // and the line is valid
        if let TokenVal::Equal = &self.tokens[self.idx + 1].val {
            return self.parse_assignement();
        } else {
            return self.parse_command();
        }
    }

    fn parse_command(&mut self) -> Result<Node, String> {
        // assume this line is "$(command with args | maybe pipe and &>/dev/null)"
        self.row = self.tokens[self.idx].row;
        let mut res: Vec<Token> = Vec::new();

        while self.idx < self.tokens.len() && self.row == self.tokens[self.idx].row {
            res.push(self.tokens[self.idx].clone());
            self.idx += 1;
        }

        Ok(Node::Command(res))
    }

    fn parse_assignement(&mut self) -> Result<Node, String> {
        // assumes this is this seq is var = "value with ${vars} or without"
        let var_name = self.tokens[self.idx].val.get_word_content();
        let line = self.tokens[self.idx].row;
        self.idx += 3;
        if cfg!(debug_assertions) {
            // can crash in some edge cases like missing main
            if line == self.tokens[self.idx].row {
                todo!("maybe stop making assumptions about variables?")
            }
        }

        Ok(Node::Assignment(
            var_name,
            self.tokens[self.idx - 1].clone(),
        ))
    }

    fn parse_fn(&mut self) -> Result<Node, String> {
        // assuming self.idx points to the beginning of a function.val
        // assuming this seq is function identifier (){
        if !self.is_valid_function_line() {
            self._error("expecting function name_of_function() {")?;
        }
        let mut block: Vec<Node> = Vec::new();
        let func_name = self.tokens[self.idx + 1].val.get_word_content();
        let row = self.tokens[self.idx].row;
        self.idx += 5;
        let mut nbr_of_non_closed_brackets = 1;
        while nbr_of_non_closed_brackets > 0 && self.idx < self.tokens.len() {
            match self.tokens[self.idx].val {
                TokenVal::CloseBra => {
                    nbr_of_non_closed_brackets -= 1;
                    self.idx += 1;
                }
                TokenVal::OpenBra => {
                    self.idx += 1;
                    nbr_of_non_closed_brackets += 1;
                }
                _ => block.push(self.parse_line()?),
            }
        }
        if nbr_of_non_closed_brackets > 0 {
            self._error("function block was not closed, missing } ?")?;
        }

        // the parsed function can be accessed through
        // this position inside the nodes vec as it will be inserted
        // just after returning the node
        let pos = self.nodes.len();
        self.functions.insert(func_name.to_owned(), pos);

        Ok(Node::Function(func_name, block, row))
    }

    fn is_valid_function_line(&self) -> bool {
        // assuming self.idx points to the beginning of a function token
        // and that the buffer won't overflow
        match (
            &self.tokens[self.idx + 1].val,
            &self.tokens[self.idx + 2].val,
            &self.tokens[self.idx + 3].val,
            &self.tokens[self.idx + 4].val,
        ) {
            (TokenVal::Const(_), TokenVal::OpenPar, TokenVal::ClosePar, TokenVal::OpenBra) => true,

            _ => false,
        }
    }

    fn _error(&self, reason: &'static str) -> Result<Node, String> {
        return Err(format!(
            "malformed expression at {}:{}, {}",
            self.tokens[self.idx].row, self.tokens[self.idx].col, reason
        ));
    }

    pub fn print(&self) {
        println!("{}", self.to_string());
    }
    pub fn to_string(&self) -> String {
        let mut buff = String::new();
        for i in 0..self.nodes.len() {
            self.nodes[i].to_string_buffer(&mut buff);
        }
        buff.pop();
        buff
    }

    pub fn new(tokens: &'a Vec<Token>, nodes: Vec<Node>) -> Self {
        Self {
            tokens,
            nodes,
            idx: 0,
            row: 1,
            functions: std::collections::HashMap::new(),
        }
    }
}

impl Node {
    pub fn to_string(&self) -> String {
        let mut res = String::new();
        self.to_string_buffer(&mut res);
        res
    }

    pub fn to_string_buffer(&self, buffer: &mut String) {
        match self {
            Node::Function(name, nodes, row) => {
                // might fail with invalid functions
                buffer.push_str(&format!("{}: Function({}) \n", row, name));
                for node in nodes.iter() {
                    buffer.push_str(" | ");
                    node.to_string_buffer(buffer);
                }
            }
            Node::Assignment(name, val) => {
                let row = val.row;
                if name.starts_with("_") {
                    buffer.push_str(&format!("{}: LocalAssignement({}) \n", row, name))
                } else {
                    buffer.push_str(&format!("{}: GlobalAssignement({})\n", row, name))
                }
            }
            Node::Command(tokens) => {
                let row = &tokens[0].row;
                let cmd_name = &tokens[0].literal_value();
                buffer.push_str(&format!("{}: Command({})\n", row, cmd_name))
            }
        }
    }
}

#[cfg(test)]
mod parser_tests {
    use super::*;
    use insta::{self, *};

    #[test]
    fn it_can_parse_a_basic_script() {
        let text = include_bytes!("../../tests/inputs/parser/script2.sh");
        let l = lexer::Lexer::new();
        let symbols = l.tokenize(text).unwrap();
        let ast = parse(&symbols).unwrap();

        let mut settings = insta::Settings::clone_current();
        settings.set_snapshot_path("../../tests/snapshots");
        settings.set_description(
            "line number: beginning_token_index-> wnd_token_index InstructioType(name)",
        );
        settings.bind(|| assert_snapshot!(ast.to_string()));
    }
}