/*!
This crate provides a binary to compile grammars into Rust code and a library implementing Earley's parsing algorithm to parse the grammars specified.

# Usage

This crate is `gramatica`. To use it you should install it in order to acquire the `gramatica_compiler` binary and also add `gramatica` to your dependencies in your project's `Cargo.toml`.

```toml
[dependencies]
gramatica = "0.1"
```

Then, if you have made a grammar file `example.rsg` execute `gramatica_compiler example.rsg > example.rs`. Afterwards you may use the generated file `example.rs` as a source Rust file.

# Example: calculator
The classical example is to implement a calculator.

```rust
extern crate gramatica;
use std::cmp::Ordering;
use std::io::BufRead;
use gramatica::{Associativity,EarleyKind,State,Parser,ParsingTablesTrait,AmbiguityInfo};

re_terminal!(Num(f64),"[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?");
re_terminal!(Plus,"\\+");
re_terminal!(Minus,"-");
re_terminal!(Star,"\\*");
re_terminal!(Slash,"/");
re_terminal!(Caret,"\\^");
re_terminal!(LPar,"\\(");
re_terminal!(RPar,"\\)");
re_terminal!(NewLine,"\\n");
re_terminal!(_,"\\s+");//Otherwise skip spaces

nonterminal Input(())
{
	() => (),
	(Input,Line) => (),
}

nonterminal Line(())
{
	(NewLine) => (),
	(Expression(value), NewLine) =>
	{
		println!("{}",value);
	},
}

nonterminal Expression(f64)
{
	(Num(value)) => value,
	#[priority(addition)]
	#[associativity(left)]
	(Expression(l),Plus,Expression(r)) => l+r,
	#[priority(addition)]
	#[associativity(left)]
	(Expression(l),Minus,Expression(r)) => l-r,
	#[priority(multiplication)]
	#[associativity(left)]
	(Expression(l),Star,Expression(r)) => l*r,
	#[priority(multiplication)]
	#[associativity(left)]
	(Expression(l),Slash,Expression(r)) => l/r,
	#[priority(addition)]
	#[associativity(left)]
	(Minus,Expression(value)) => -value,
	#[priority(exponentiation)]
	#[associativity(right)]
	(Expression(l),Caret,Expression(r)) => l.powf(r),
	(LPar,Expression(value),RPar) => value,
}

ordering!(exponentiation,multiplication,addition);

fn main()
{
	let stdin=std::io::stdin();
	for rline in stdin.lock().lines()
	{
		let line=rline.unwrap()+"\n";
		println!("line={}",line);
		match Parser::<Token,ParsingTables>::parse(&line,None)
		{
			Err(x) => println!("error parsing: {:?}",x),
			Ok(x) => println!("parsed correctly: {:?}",x),
		};
	}
}
```

# Advanced Lexer

To define terminal tokens not expressable with regular expressions you may use the following.

```rust
terminal LitChar(char)
{
	fn _match(parser: &mut Parser<Token,ParsingTables>, source:&str) -> Option<(usize,char)>
	{
		let mut characters=source.chars();
		if (characters.next())==(Some('\''))
		{
			let mut c=characters.next().unwrap();
			let mut size=3;
			if c=='\\'
			{
				c=(characters.next().unwrap());
				size=4;
			}
			if characters.next().unwrap()=='\''
			{
				Some((size,c))
			}
			else
			{
				None
			}
		}
		else
		{
			None
		}
	}
}
```

Since version 0.1.0 there is also a `keyword_terminal!` macro:

```rust
keyword_terminal!(Const,"const");
```

# Parsing values as match clauses

Each rule is written as a match clause, whose ending expression is the value that the nonterminal token gets after being parsed. For example:

```rust
nonterminal Stmts(Vec<StmtKind>)
{
	(Stmt(ref stmt)) => vec![stmt.clone()],
	(Stmts(ref stmts),Stmt(ref stmt)) =>
	{
		let mut new=(stmts.clone());
		new.push(stmt.clone());
		new
	},
}
```

Reductions only execute if they are part of the final syntactic tree.

# Precedence by annotations
To avoid ambiguities you have two options: to ensure the grammar does not contain them or to priorize rules by introducing annotations. In the example of the calculator we have seen two kinds:
- `#[priority(p_name)]` to declare a rule with priority `p_name`. Later there should be a `ordering!(p_0,p_1,p_2,...)` macro-like to indicate that `p_0` should reduce before `p_1`.
- `#[associativity(left/right)]` to decide how to proceed when nesting the same rule.

*/

extern crate regex;

use std::collections::HashMap;
use regex::Regex;
use std::cmp::Ordering;
use std::marker::PhantomData;
use std::fmt::Debug;

#[derive(Debug)]
pub enum Associativity
{
	Left,
	Right,
}

#[derive(Debug)]
pub enum ParsingError
{
	NotInGrammar,
	Ambiguous,
}

#[derive(Clone,Debug)]
pub enum EarleyKind
{
	Complete(usize,usize,usize),//reduced state, set,  state
	Scan(usize,usize),//set, state
	Predict(usize),//state index
}

#[derive(Clone,Debug)]
pub struct AmbiguityInfo<T>
{
	states: Vec<State<T>>,
	index: usize,
}

impl<T> Default for AmbiguityInfo<T>
{
	fn default() -> Self
	{
		AmbiguityInfo{
			states: vec![],
			index: 0,
		}
	}
}

#[derive(Clone,Debug)]
pub struct State<T>
{
	pub rule: usize,
	pub left: usize,
	pub right: Vec<usize>,
	pub position: usize,
	pub original_set: usize,
	pub kind: EarleyKind,
	//values: Vec<Token>,
	pub values: Vec<T>,
	//computed_value: Token,
	pub computed_value: T,
	pub ambiguity_info: AmbiguityInfo<T>,
}

impl<T:Clone+Default> State<T>
{
	pub fn finished(&self)->bool
	{
		self.position==self.right.len()
	}
	pub fn next(&self)->usize
	{
		self.right[self.position]
	}
	pub fn try_next(&self)->Option<usize>
	{
		if self.position==self.right.len()
		{
			None
		}
		else
		{
			Some(self.right[self.position])
		}
	}
	//pub fn is_next_terminal(&self)->bool
	//{
	//	table_terminal(self.next())
	//}
	pub fn advance(&self)->State<T>
	{
		State{
			rule: self.rule,
			left:self.left,
			right:self.right.clone(),
			position:self.position+1,
			original_set: self.original_set,
			kind: self.kind.clone(),
			values: self.values.clone(),
			//computed_value: Token::DummyStart,
			computed_value: T::default(),
			ambiguity_info: self.ambiguity_info.clone(),
		}
	}
}

pub struct StateSet<T>
{
	pub states: Vec<State<T>>,
}

impl<T> StateSet<T>
{
	pub fn predict(&mut self, state: State<T>)
	{
		for s in self.states.iter()
		{
			//if s.left==state.left && s.position==state.position && s.right==state.right
			if s.rule==state.rule && s.position==state.position
			{
				return;
			}
		}
		self.states.push(state);
	}
}

pub trait ParsingTablesTrait<T>
{
	fn initial()->usize;
	fn match_some(parser:&mut Parser<T,Self>) -> Option<(usize,T)> where Self:Sized;
	fn predict(parser:&mut Parser<T,Self>,index:usize,state_index:usize,token:usize) where Self:Sized;
	fn compute_value(state:&mut State<T>);
	fn table_terminal(token_index:usize)->bool;
	fn table_priority(a:usize, b:usize) -> Option<Ordering>;
	fn table_associativity(rule:usize) -> Option<Associativity>;
	fn to_usize(&T) -> usize;
}

pub struct Parser<'a,T,Tables:ParsingTablesTrait<T>>
{
	pub sets: Vec<StateSet<T>>,
	pub source: &'a str,
	pub source_index: usize,
	pub tokens: Vec<T>,
	pub tokens_range: Vec<(usize,usize)>,
	pub regex_map: HashMap<String,Regex>,
	pub phantom: PhantomData<Tables>,
}

impl<'a,T:Default+PartialEq+Clone+Debug,Tables:ParsingTablesTrait<T>> Parser<'a,T,Tables>
{
	pub fn parse(source:&str, initial:Option<usize>) -> Result<T,ParsingError>
	{
		let mut initial_state=Tables::initial();
		if let Some(x)=initial
		{
			initial_state=x;
		}
		let mut parser=Parser::<T,Tables>{
			sets:vec![ StateSet{
				states:vec![ State{
					rule:0,
					left:0,
					right:vec![initial_state],
					position:0,
					original_set: 0,
					kind: EarleyKind::Predict(0),
					//values: vec![Token::DummyStart],
					//computed_value: Token::DummyStart,
					values: vec![T::default()],
					computed_value: T::default(),
					ambiguity_info: AmbiguityInfo::default(),
				}]
			}],
			source,
			source_index:0,
			tokens: vec![],
			tokens_range: vec![],
			regex_map: HashMap::new(),
			phantom: PhantomData,
		};
		//println!("::parse created parser");
		parser.tokenize();
		//println!("::parse tokenized");
		parser.earley()
	}
	pub fn re(&mut self, regex:&'a str, source:&str) -> Option<(usize,String)>
	{
		//let s=String::from("^")+regex;
		let s=format!("^({})",regex);
		let r=
		{
			if !self.regex_map.contains_key(&s)
			{
				let x=Regex::new(&s).unwrap();
				self.regex_map.insert(s.clone(),x);
			}
			self.regex_map.get(&s).expect("regex not in map even after inserting")
		};
		match r.captures(source)
		{
			Some(cap) =>
			{
				let m=cap.get(0).unwrap();
				Some((m.end(),m.as_str().to_string()))
			},
			None => None,
		}
	}
	pub fn keyword(&mut self, key:&str, source:&str) -> Option<(usize,String)>
	{
		let mut key_chars=key.chars();
		let mut source_chars=source.chars();
		loop
		{
			match (key_chars.next(),source_chars.next())
			{
				//None => match source_chars.next()
				//{
				//	Some('a'...'z' | 'A'...'Z' | '_') => return None,
				//	_ => return Some((key.len(),key.to_string())),
				//},
				//Some(c) =>

				//(None,Some('a'...'z' | 'A'...'Z' | '_')) => return None,
				(None,Some(d)) => match d
				{
					'a'...'z' | 'A'...'Z' | '_' => return None,
					_ => return Some((key.len(),key.to_string())),
				},
				(None,None) => return Some((key.len(),key.to_string())),
				(Some(c),Some(d)) => if c!=d { return None;},
				_ => return None,
			};
		}
	}
	pub fn tokenize(&mut self)
	{
		while self.source_index<self.source.len()
		{
			//match self.match_some()
			//match ParsingTables::match_some(self)
			match Tables::match_some(self)
			{
				None =>
				{
					let s=self.source[self.source_index..self.source_index+100].to_string();
					panic!("Did not match anything '{}'",s);
				},
				Some((size,token)) =>
				{
					//println!("Got token {:?} with size {}",token,size);
					//if let Token::DummyStart=token {}
					//else
					//{
					//	self.tokens.push(token);
					//}
					if T::default()!=token
					{
						self.tokens.push(token);
						self.tokens_range.push((self.source_index,self.source_index+size));
					}
					self.source_index+=size;
				},
			};
		}
	}
	pub fn earley(&mut self) -> Result<T,ParsingError>
	{
		let n=self.tokens.len();
		for index in 0..n+1
		{
			if self.sets[index].states.len()==0
			{
				println!("unexpected token {:?}",self.tokens[index-1]);
				let (start,end)=self.tokens_range[index-1];
				let showing_start= if start>100 {start-100} else {0};
				let showing_end= if end+100>=self.source.len() {self.source.len()} else {start+100};
				println!("BEFORE<{}>",self.source[showing_start..start].to_string());
				println!("THEN<{}>",self.source[start..end].to_string());
				println!("AFTER<{}>",self.source[end..showing_end].to_string());
				print!("Tokens=...");
				let token_start = if index>20 { index-20 } else {0};
				let token_end = if index+20>self.tokens.len() {self.tokens.len()} else { index+20 };
				for i in token_start..index-1
				{
					print!("{:?},",self.tokens[i]);
				}
				print!("[{:?}],",self.tokens[index-1]);
				for i in index..token_end
				{
					print!("{:?},",self.tokens[i]);
				}
				println!("");
				return Err(ParsingError::NotInGrammar);
			}
			let last=index==n;
			//if last
			//{
			//	println!("At index={} END",index);
			//}
			//else
			//{
			//	println!("At index={} token={:?}",index,self.tokens[index]);
			//}
			let mut state_index=0;
			let mut newset=StateSet{states:vec![]};
			while state_index < self.sets[index].states.len()
			{
				let state=self.sets[index].states[state_index].clone();
				//println!("\tAt index={} state_index={} state={:?}",index,state_index,state);
				if state.finished()
				{
					//Completer
					let token=state.left;
					let x=state.original_set;
					let mut priority=true;
					let mut ambiguity=vec![state.clone()];
					for i in 0..self.sets[index].states.len()
					{
						if i==state_index
						{
							continue;
						}
						let other=&self.sets[index].states[i];
						if token!=other.left || x!=other.original_set || !other.finished()
						{
							continue;
						}
						//Two rules parse the same text to the same token, apply priority
						match Tables::table_priority(state.rule,other.rule)
						{
							None =>
							{
								ambiguity.push(other.clone());
								if i<state_index
								{
									priority=false;
									break;
								}
							},
							Some(Ordering::Less) =>
							{
								priority=false;
								//println!("set priority=false for {} because of {}",state_index,i);
								break;
							},
							Some(Ordering::Equal) =>
							{
								//Same priority, use associativity
								//match table_associativity(state.rule)
								match Tables::table_associativity(state.rule)
								{
									None =>
									{
										ambiguity.push(other.clone());
										if i<state_index
										{
											priority=false;
											break;
										}
									},
									Some(Associativity::Left) => if state_index>i
									{
										priority=false;
										//println!("set priority=false for {} because of {} [left associativity]",state_index,i);
										break;
									},
									Some(Associativity::Right) => if state_index<i
									{
										priority=false;
										//println!("set priority=false for {} because of {} [right associativity]",state_index,i);
										break;
									},
								}
							},
							Some(Ordering::Greater) => continue,
						};
					}
					if priority
					{
						//for each state with next=token in set x, advance it to current set
						let mut i=0;
						while i<self.sets[x].states.len()
						{
							//if self.sets[x].states[i].next()==token
							if let Some(t)=self.sets[x].states[i].try_next()
							{
								if token==t
								{
									//self.sets[index].states.push(self.sets[x].states[i].advance());
									let mut new=self.sets[x].states[i].advance();
									new.kind=EarleyKind::Complete(state_index,x,i);
									if ambiguity.len()>1
									{
										new.ambiguity_info.states=ambiguity.clone();
										new.ambiguity_info.index=index;
									}
									self.sets[index].states.push(new);
								}
							}
							i+=1;
						}
					}
				}
				else
				{
					//if state.is_next_terminal()
					//if table_terminal(state.next())
					if Tables::table_terminal(state.next())
					{
						//Scanner
						//if !last && state.next()==self.tokens[index].to_usize()
						if !last && state.next()==Tables::to_usize(&self.tokens[index])
						{
							//newset.states.push(state.advance());
							let mut new=state.advance();
							new.kind=EarleyKind::Scan(index,state_index);
							//new.value=self.tokens[index].clone();
							new.values[new.position-1]=self.tokens[index].clone();
							newset.states.push(new);
						}
					}
					else
					{
						//Predictor
						let token=state.next();
						//for each token->right in grammar, add the rule to self.sets[index]
						//ParsingTables::predict(self,index,state_index,token);
						Tables::predict(self,index,state_index,token);
					}
				}
				state_index+=1;
			}
			self.sets.push(newset);
		}
		let mut values=vec![];
		for state_index in 0..self.sets[n].states.len()
		{
			if self.sets[n].states[state_index].left==0 && self.sets[n].states[state_index].finished()
			{
				self.compute_value(n,state_index);
				if self.sets[n].states[state_index].ambiguity_info.states.len()>1
				{
					let a=&self.sets[n].states[state_index].ambiguity_info;
					println!("Ambiguity at index={} token={:?}",a.index,self.tokens[a.index-1]);
					let (start,end)=self.tokens_range[a.index-1];
					let showing_start= if start>100 {start-100} else {0};
					let showing_end= if end+100>=self.source.len() {self.source.len()} else {start+100};
					println!("BEFORE<{}>",self.source[showing_start..start].to_string());
					println!("THEN<{}>",self.source[start..end].to_string());
					println!("AFTER<{}>",self.source[end..showing_end].to_string());
					return Err(ParsingError::Ambiguous);
				}
				values.push(self.sets[n].states[state_index].computed_value.clone());
			}
		}
		//println!("values={:?}",values);
		if values.len()==0
		{
			Err(ParsingError::NotInGrammar)
		}
		else if values.len()==1
		{
			Ok(values[0].clone())
		}
		else
		{
			Err(ParsingError::Ambiguous)
		}
	}
	pub fn compute_value(&mut self, set_index:usize, state_index:usize)
	{
		//println!(">>compute_value set_index={} state_index={} state={:?}",set_index,state_index,self.sets[set_index].states[state_index]);
		match self.sets[set_index].states[state_index].kind
		{
			EarleyKind::Complete(reduced,prev_set_index,prev_state_index) =>
			{
				self.compute_value(set_index,reduced);
				self.compute_value(prev_set_index,prev_state_index);
				//let current=&mut self.sets[set_index].states[state_index];
				let mut prop_ambiguity=None;
				self.sets[set_index].states[state_index].values=
				{
					let prev=&self.sets[prev_set_index].states[prev_state_index];
					let red=&self.sets[set_index].states[reduced];
					let mut current_values=prev.values.clone();
					current_values[self.sets[set_index].states[state_index].position-1]=red.computed_value.clone();
					if prev.ambiguity_info.states.len()>1
					{
						prop_ambiguity=Some(prev.ambiguity_info.clone());
					}
					else if red.ambiguity_info.states.len()>1
					{
						prop_ambiguity=Some(red.ambiguity_info.clone());
					}
					current_values
				};
				if let Some(a)=prop_ambiguity
				{
					self.sets[set_index].states[state_index].ambiguity_info=a;
				}
			},
			EarleyKind::Scan(prev_set_index,prev_state_index) =>
			{
				//They have the value already
				self.compute_value(prev_set_index,prev_state_index);
				for i in 0..self.sets[set_index].states[state_index].position-1
				{
					self.sets[set_index].states[state_index].values[i]=self.sets[prev_set_index].states[prev_state_index].values[i].clone();
				}
				self.sets[set_index].states[state_index].ambiguity_info=self.sets[prev_set_index].states[prev_state_index].ambiguity_info.clone();
			},
			EarleyKind::Predict(_harbinger_state_index) =>
			{
			},
		}
		let current=&mut self.sets[set_index].states[state_index];
		if current.finished()
		{
			//Compute its value
			//ParsingTables::compute_value(current);
			Tables::compute_value(current);
		}
	}
}

#[cfg(test)]
mod tests {
    #[test]
    fn it_works() {
        assert_eq!(2 + 2, 4);
    }
}