use super::ir::*;
use crate::parser::tokens::{Token, TokenVal};
use crate::parser::{Ast, Node};
pub struct Program {
pub functions: Vec<UDF>,
pub consts: Vec<String>, pub expr: Vec<Expr>, pub name_to_var: NameToIndex, pub name_to_func: NameToIndex, pub name_to_const: NameToIndex, pub entrypoint_idx: u16, func_idx: u16,
expr_idx: u16,
consts_idx: u16,
glob_idx: u16,
}
type ProgResult = Result<(), String>;
impl Program {
pub fn new() -> Self {
Self {
entrypoint_idx: 0,
functions: Vec::new(),
func_idx: 0,
consts_idx: 0,
expr_idx: 0,
glob_idx: 0,
expr: Vec::new(),
consts: Vec::new(),
name_to_var: NameToIndex::new(),
name_to_func: NameToIndex::new(),
name_to_const: NameToIndex::new(),
}
}
pub fn compile(&mut self, ast: &Ast) -> ProgResult {
self.name_to_func
.insert("__init_global_vars__".to_string(), 0);
self.func_idx = 1;
self.functions.push(UDF::defaults());
self.extract_func_and_vars_names(ast)?;
if let Some(idx) = self.name_to_func.get("main") {
self.entrypoint_idx = *idx;
} else {
return Err("missing main function".to_owned());
}
self.extract_functions(ast)?;
Ok(())
}
fn extract_func_and_vars_names(&mut self, ast: &Ast) -> ProgResult {
let locals = NameToLocalVar::new();
for node in ast.nodes.iter() {
match node {
Node::Assignment(name, token) => {
let val_index = match &token.val {
TokenVal::Const(v) => self._extract_const(v.to_owned()),
TokenVal::Var(v) => self._extract_expr(v, &locals, token)?,
TokenVal::SubCmd(_) => todo!("subcommand compilation"),
_ => unreachable!("invalid assignement"),
};
let var_name = name.to_owned();
let var_idx = if let Some(idx) = self.name_to_var.get(&var_name) {
*idx
} else {
self.name_to_var.insert(var_name, self.glob_idx);
self.glob_idx += 1;
self.glob_idx - 1
};
self.functions[0]
.instructions
.push(IR::GlobalAssignement(var_idx, val_index))
}
Node::Function(name, _nodes, _) => {
let var = name.to_owned();
if !self.name_to_func.contains_key(&var) {
self.name_to_func.insert(var, self.func_idx);
self.func_idx += 1;
} else {
println!("[WARN] redefined function:'{}', will be ignored", name);
}
}
_ => (),
}
}
Ok(())
}
fn extract_function(&mut self, name: &str, nodes: &[Node]) -> ProgResult {
let mut instructions = Vec::new();
let mut locals = NameToLocalVar::new();
let mut nbr_args: u8 = 0;
let mut default_args = Vec::new();
for node in nodes.iter() {
let instruc = match node {
Node::Command(tokens) => self._extract_cmd_or_func_call(tokens, &locals),
Node::Assignment(name, token) => {
if let Some(arg_idx) =
self._extract_nbr_args_and_defaults(token, &mut default_args)
{
if arg_idx as u8 > nbr_args {
nbr_args = arg_idx as u8
}
}
self._extract_assignement(name, token, &mut locals)
}
Node::Function(_, _, _) => {
unreachable!("cannot define a function inside a function")
}
};
match instruc {
Ok(inst) => {
instructions.push(inst);
}
Err(e) => return Err(e),
}
}
let udf = UDF {
nbr_args,
instructions,
default_args,
};
self.name_to_func
.insert(name.to_owned(), self.functions.len() as u16);
self.functions.push(udf);
Ok(())
}
fn _extract_assignement(
&mut self,
name: &str,
token: &Token,
locals: &mut NameToLocalVar,
) -> IRResult {
let ir = if var_is_global(name) {
let idx = if let Some(idx) = self.name_to_var.get(name) {
*idx
} else {
return Err(format!(
"undefined global variable {} before {}",
name, token
));
};
let index = match &token.val {
TokenVal::Const(s) => self._extract_const(s.to_owned()),
TokenVal::Var(s) => self._extract_expr(s, locals, &token)?,
TokenVal::SubCmd(_) => todo!("assignment from subcomand"),
_ => unreachable!("invalid assignment: {}", token),
};
IR::GlobalAssignement(idx, index)
} else {
let idx = get_or_add_local(name, locals);
let index = match &token.val {
TokenVal::Const(s) => self._extract_const(s.to_owned()),
TokenVal::Var(s) => {
let val = s.as_bytes();
if val.len() > 3
&& val[0] == '$' as u8
&& val[1] == '{' as u8
&& val[2] > '0' as u8
&& val[2] <= '9' as u8
{
let arg_nbr = val[2] - '0' as u8;
Index::Arg(arg_nbr)
} else {
self._extract_expr(s, locals, &token)?
}
}
TokenVal::SubCmd(_) => todo!("assignment from subcomand"),
_ => unreachable!("invalid assignment: {}", token),
};
IR::LocalAssignement(idx, index)
};
Ok(ir)
}
fn _extract_cmd_or_func_call(&mut self, tokens: &[Token], locals: &NameToLocalVar) -> IRResult {
let first_token = &tokens[0];
let func_or_cmd_name = tokens[0].literal_value();
let func_or_cmd = if let Some(idx) = self.name_to_func.get(&func_or_cmd_name) {
Index::Udf(*idx)
} else {
self._extract_const(func_or_cmd_name)
};
let mut irs: Vec<Index> = Vec::with_capacity(tokens.len() - 1);
for idx in 1..tokens.len() {
let token = &tokens[idx];
let ir = self.extract_index_from_token(token, locals)?;
irs.push(ir);
}
let res = match func_or_cmd {
Index::Const(idx) => IR::CmdCall(idx, irs),
Index::Udf(idx) => {
let udf = &self.functions[idx as usize];
let fn_args = irs.len() as u8;
let min_args = udf.min_args();
if fn_args < min_args {
return Err(format!(
"missing arguments, expecting at least {} found {} when calling {}",
min_args, fn_args, first_token,
));
}
if fn_args > udf.nbr_args {
return Err(format!(
"too many arguments, expecting {} found {} when calling {}",
udf.nbr_args, fn_args, first_token,
));
}
IR::UdfCall(idx, irs)
}
_ => unreachable!(""),
};
Ok(res)
}
fn extract_index_from_token(&mut self, token: &Token, locals: &NameToLocalVar) -> IndexResult {
match &token.val {
TokenVal::Var(s) => self._extract_var_content(s, locals, &token),
TokenVal::Const(s) => Ok(self._extract_const(s.to_owned())),
_ => {
let msg = format!("handle pipe: failing at {}:{}", token.row, token.col);
todo!("{}", msg)
}
}
}
fn _extract_expr(
&mut self,
value: &str,
locals: &NameToLocalVar,
token: &Token,
) -> IndexResult {
match Expr::from_str(value, locals, &self.name_to_var) {
Ok(var) => {
self.expr.push(var);
self.expr_idx += 1;
Ok(Index::Expr(self.expr_idx - 1))
}
Err(e) => Err(format!("{} while processing {}", e, token)),
}
}
fn _extract_var_content(
&mut self,
value: &str,
locals: &NameToLocalVar,
token: &Token,
) -> IndexResult {
match Expr::from_str(value, locals, &self.name_to_var) {
Ok(var) => {
self.expr.push(var);
self.expr_idx += 1;
Ok(Index::Expr(self.expr_idx - 1))
}
Err(e) => Err(format!("{} while processing {}", e, token)),
}
}
fn _add_or_get_const_idx(&mut self, value: String) -> u16 {
if let Some(idx) = self.name_to_const.get(&value) {
*idx
} else {
self.consts.push(value.to_owned());
self.name_to_const.insert(value, self.consts_idx);
self.consts_idx += 1;
self.consts_idx - 1
}
}
fn _extract_const(&mut self, value: String) -> Index {
Index::Const(self._add_or_get_const_idx(value))
}
fn extract_functions(&mut self, ast: &Ast) -> ProgResult {
let mut idx = 1;
for node in ast.nodes.iter() {
match node {
Node::Function(name, nodes, _) => {
if let Some(func_idx) = self.name_to_func.get(name) {
if idx == *func_idx {
self.extract_function(name, nodes)?;
}
}
idx += 1;
}
_ => (),
}
}
Ok(())
}
fn _extract_nbr_args_and_defaults(
&mut self,
token: &Token,
defaults: &mut Vec<u16>,
) -> Option<u8> {
match &token.val {
TokenVal::Const(_) => None,
TokenVal::Var(s) => {
let val = s.as_bytes();
let len = val.len() - 2;
let mut idx = 0;
let mut max = 0;
while idx < len {
if val[idx] == '$' as u8 && val[idx + 1] == '{' as u8 {
if val[idx + 2] > '0' as u8 && val[idx + 2] <= '9' as u8 {
let nbr = val[idx + 2] - '0' as u8;
if max < nbr {
max = nbr;
}
idx += 3;
if idx + 1 < len && val[idx] == ':' as u8 && val[idx + 1] == '-' as u8 {
idx += 2;
let start = idx;
while idx < len + 1 && val[idx] != '}' as u8 {
idx += 1;
}
let arg_val =
std::str::from_utf8(&val[start..idx]).unwrap().to_owned();
self._extract_const(arg_val.to_owned());
let idx = self.name_to_const.get(&arg_val).unwrap();
defaults.push(*idx);
}
}
}
idx += 1;
}
if max > 0 {
Some(max)
} else {
None
}
}
TokenVal::SubCmd(_) => None, _ => unreachable!("must not encounter any other token at this point"),
}
}
pub fn print(&self) {
println!("{}", self.to_string());
}
}
fn get_or_add_local(name: &str, locals: &mut NameToLocalVar) -> u8 {
if let Some(idx) = locals.get(name) {
*idx
} else {
let idx = locals.len() as u8;
locals.insert(name.to_owned(), idx);
idx
}
}
fn var_is_global(varname: &str) -> bool {
let ascii_code = varname.bytes().nth(0).unwrap();
return ascii_code >= 'A' as u8 && ascii_code <= 'Z' as u8;
}
#[cfg(test)]
mod tests {
use crate::compiler::tests::snap;
use insta;
const DESC: &'static str = "intermediate representation of the program
\\0 refers to global variable at index 0
$0 refers to local variable at index 0
";
snap!(basic_script, DESC);
snap!(loc_and_glob_ir, DESC);
}