pub mod err_code {
pub const UEOF : u32 = 0;
pub const U_TERM : u32 = 1;
pub const U_TOKEN : u32 = 2;
pub const M_TOKEN : u32 = 3;
}
use err_code::*;
#[derive(Debug)]
pub struct SyntaxError {
pub code : u32,
pub token_concerned : Option<Token>,
pub additional_info : String
}
impl SyntaxError {
fn new(code : u32, t : Option<Token>, info : String) -> SyntaxError {
SyntaxError{code : code, token_concerned : t, additional_info : info}
}
}
#[derive(Debug, Clone)]
pub struct STree
{
pub value : Token,
pub children : Vec<STree>
}
impl std::fmt::Display for STree {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fn aux(this : &STree, lvl : u32) -> String {
let mut ident = String::new();
for _ in 0..lvl {
ident = format!("{ident}\t");
}
let mut ret = format!("{ident}STree : value = {}\n{ident}\tchildrens :\n", this.value);
for child in &this.children {
ret = format!("{ret}{}\n", aux(child, lvl + 1));
}
ret
}
write!(f, "{}", aux(self, 0))
}
}
#[derive(Debug, Clone)]
pub enum Token {
Terminal {
id : u32,
pos : u32,
value : String
},
NTerminal {
id : u32
}
}
impl Token {
pub fn new_terminal(id : u32, value : String, pos : u32) -> Token {
return Token::Terminal{id : id, value : value, pos : pos};
}
pub fn new_nb(id : u32) -> Token {
return Token::NTerminal{id : id};
}
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Token::Terminal{id, pos, value} => {
return write!(f, "T : {value} is {}, begin at {} and finish at {}", *id, *pos, *pos as usize + value.len());
}
Token::NTerminal{id} => {
return write!(f, "NT : {}", *id);
}
}
}
}
#[derive(Debug)]
pub struct Terminal {
pub id : u32,
pub pos : u32,
pub value : String
}
pub mod default_id {
pub const EOF : u32 = 0;
pub const NONE : u32 = 1;
}
pub mod default_token {
use super::*;
pub static T_EOF : Token = Token::Terminal{id : default_id::EOF, pos : 0, value : String::new()};
}
pub struct Rule {
derivations : Vec<Vec<u32>>
}
impl Rule {
pub fn new(d : Vec<Vec<u32>>) -> Rule {
Rule{derivations : d}
}
}
pub struct LL1Parser<'a>
{
rules : Vec<Rule>,
nt_begin : u32,
axiom : u32,
derivations : Vec<Vec<u32>>,
actions : Vec<&'a dyn Fn(&mut STree)>,
derivations_begin : Vec<u32>,
table : Vec<Vec<u32>>
}
impl<'a> LL1Parser<'a> {
pub fn new(rules : Vec<Rule>, actions : Vec<&'a dyn Fn(&mut STree)>, axiom : u32, nt_begin : u32) -> LL1Parser {
let nb_rules = rules.len();
let mut this = LL1Parser{
rules: rules,
nt_begin: nt_begin,
axiom : axiom,
derivations : Vec::new(),
actions : actions,
derivations_begin : Vec::with_capacity(nb_rules),
table: Vec::new()
};
let mut nb_derivations : usize = 0;
for rule in &this.rules {
this.derivations_begin.push(nb_derivations as u32);
nb_derivations += rule.derivations.len();
}
this.derivations = Vec::with_capacity(nb_derivations);
for rule in &this.rules {
for derivation in &rule.derivations {
this.derivations.push(Vec::with_capacity(derivation.len()));
let i = this.derivations.len() - 1;
for sub_d in derivation {
this.derivations[i].push(*sub_d);
}
}
}
this.table = vec![vec![u32::MAX; nt_begin as usize]; nb_rules];
this
}
pub fn make_table(&mut self) -> Result<(), String> {
fn create_rule(
this : &mut LL1Parser,
r_i : usize,
term0 : &mut Vec<Vec<u32>>,
is_in_progress : &mut Vec<bool>
) -> Result<(), String> {
if is_in_progress[r_i] {
return Err(format!("Left recursion error for the rule {}", r_i + this.nt_begin as usize));
}
for d_i in 0..this.rules[r_i].derivations.len() {
let derivation = &this.rules[r_i].derivations[d_i];
let t_act = derivation[0] as usize;
if t_act < this.nt_begin as usize {
term0[r_i].push(t_act as u32);
if this.table[r_i][t_act] != u32::MAX && t_act as u32 != default_id::NONE {
return Err(
format!(
"Left factorisation error for the {r_i}th rule (derivation {}).",
d_i + this.derivations_begin[r_i] as usize
)
);
}
this.table[r_i][t_act] = this.derivations_begin[r_i] + d_i as u32;
continue;
} is_in_progress[r_i] = true;
let t_acti = t_act - this.nt_begin as usize;
if term0[t_acti].len() == 0 {
match create_rule(this, t_acti, term0, is_in_progress) {
Err(err) => {
return Err(format!("{err}\n\tNote : From rule {}", r_i + this.nt_begin as usize));
}
Ok(()) => {}
}
}
for t_i in 0..term0[t_acti].len() {
let terminal = term0[t_acti][t_i];
term0[r_i].push(terminal);
if this.table[r_i][terminal as usize] != u32::MAX {
return Err(format!("Left factorisation error for the rule {}", r_i + this.nt_begin as usize));
}
this.table[r_i][terminal as usize] = this.derivations_begin[r_i] + d_i as u32
}
is_in_progress[r_i] = false;
}
Ok(())
}
let nb_rules = self.rules.len();
let nb_terminals = self.nt_begin;
let mut term0 = vec![Vec::with_capacity(nb_terminals as usize); nb_rules];
let mut is_in_progress = vec![false; nb_rules];
for r_i in 0..self.rules.len() {
if term0[r_i].len() > 0 {
continue;
}
match create_rule(self, r_i, &mut term0, &mut is_in_progress) {
Err(err) => {return Err(err);}
Ok(()) => {}
}
}
Ok(())
}
pub fn analyse_tokens<F>(&mut self, mut get_token : F)
-> Result<(STree, String), SyntaxError> where F : FnMut() -> Option<Token>
{
let mut warnings = String::from("");
let mut stack : Vec<u32> = Vec::new();
stack.push(self.axiom);
let mut tree_stack : Vec<(STree, u32, u32)> = Vec::new();
let mut token_act = match get_token() {
Some(Token::Terminal{id, value, pos}) => { Terminal{id : id, value : value.clone(), pos : pos} }
Some(Token::NTerminal{id}) => {
return Err(SyntaxError::new(
U_TERM,
Some(Token::NTerminal{id : id}),
format!("Tokens expected in input, got a non terminal")
)
);
}
None => {
return Err(SyntaxError::new(
UEOF,
None,
String::from("Unexpected EOF")
)
);
}
};
'main_loop : while let Some(top) = stack.pop() {
let mut i_top = tree_stack.len();
if i_top > 1 {
let mut nb_node_to_push = tree_stack[i_top - 1].1;
while nb_node_to_push == 0 && i_top > 1 {
i_top -= 1;
let last_vertex = match tree_stack.pop() {
Some((mut vertex, _, i_action)) => {
self.actions[i_action as usize](&mut vertex);
vertex
}
None => { continue; }
};
tree_stack[i_top - 1].0.children.push(last_vertex);
tree_stack[i_top - 1].1 -= 1;
nb_node_to_push = tree_stack[i_top - 1].1;
}
}
if token_act.id > self.nt_begin { return Err(SyntaxError::new(
U_TOKEN,
Some(Token::Terminal{id : token_act.id, value : token_act.value, pos : token_act.pos}),
String::from("Unknown token")
)
);
}
if top < self.nt_begin {
if top == token_act.id {
let i_top = tree_stack.len() - 1;
tree_stack[i_top].0.children.push(
STree {
children : Vec::new(),
value : Token::Terminal{id : token_act.id, value : token_act.value.clone(), pos : token_act.pos}
}
);
tree_stack[i_top].1 -= 1;
} else {
if token_act.id == default_id::EOF {
return Err(SyntaxError::new(
UEOF,
None,
String::from("Unexpected EOF")
)
);
}
return Err(SyntaxError::new(
M_TOKEN,
Some(Token::Terminal{id : token_act.id, value : token_act.value, pos : token_act.pos}),
format!("Uncorresponding token : {top} expected, found {}", token_act.id)
)
);
}
let next_t = match get_token() {
Some(Token::Terminal{id, value, pos}) => { Terminal{id : id, value : value.to_string(), pos : pos} }
Some(Token::NTerminal{id}) => {
return Err(SyntaxError::new(
U_TERM,
Some(Token::NTerminal{id : id}),
format!("Tokens expected in input, got a non terminal")
)
);
}
None => {
return Err(SyntaxError::new(
UEOF,
None,
String::from("Unexpected EOF")
)
);
}
};
drop(token_act);
token_act = next_t;
continue 'main_loop;
}
let d_i = self.table[(top - self.nt_begin) as usize][token_act.id as usize] as usize;
if d_i == u32::MAX as usize {
if self.table[(top - self.nt_begin) as usize][default_id::NONE as usize] != u32::MAX {
let i_top = tree_stack.len();
tree_stack[i_top - 1].1 -= 1;
continue 'main_loop;
} else {
if token_act.id == default_id::EOF {
println!("EXPECTED");
return Err(SyntaxError::new(
UEOF,
None,
String::from("Unexpected EOF")
)
);
}
return Err(
SyntaxError::new(
0,
None,
format!(
"There is no rule which correspond to the derivation : Rule {top} doesn't begin by token {}",
token_act.id
)
)
);
}
}
let len = self.derivations[d_i].len();
tree_stack.push(
(
STree {children : Vec::with_capacity(len), value : Token::NTerminal{id : top}},
len as u32,
d_i as u32 )
);
for t_i in 1..len + 1 {
let t_to_push_i = len - t_i;
stack.push(self.derivations[d_i][t_to_push_i] as u32);
}
}
let mut i_top = tree_stack.len();
let mut nb_node_to_push = tree_stack[i_top - 1].1; while i_top > 1 {
i_top -= 1;
if nb_node_to_push != 0 {
panic!("Internal parser error : look at line 479 : more than one node remaining to push while not in the main loop");
}
let last_vertex = match tree_stack.pop() {
Some((mut vertex, _, i_action)) => {
self.actions[i_action as usize](&mut vertex);
vertex
}
None => { continue; } };
tree_stack[i_top - 1].0.children.push(last_vertex);
tree_stack[i_top - 1].1 -= 1;
nb_node_to_push = tree_stack[i_top - 1].1;
}
if let Some(_) = get_token() {
warnings = String::from("Warning : Tokens remain unanalysed.");
}
match tree_stack.pop() {
Some((stree, _, _)) => { return Ok((stree, warnings)); }
None => { panic!("Internal parser error : No syntax tree to be return"); }
}
}
pub fn get_table(&self) -> &Vec<Vec<u32>> {
&self.table
}
}