#![allow(dead_code)]
#![allow(unused_variables)]
#![allow(non_snake_case)]
#![allow(non_camel_case_types)]
#![allow(unused_parens)]
#![allow(unused_mut)]
#![allow(unused_assignments)]
#![allow(unused_doc_comments)]
#![allow(unused_imports)]
use std::str::Chars;
use regex::Regex;
use std::collections::{HashSet,BTreeMap,BTreeSet};
use crate::RawToken::*;
use crate::{LBox,LRc,lbup};
use std::any::Any;
use bumpalo::Bump;
use std::io;
#[cfg(feature = "legacy-parser")]
pub struct Lextoken<AT:Default> {
pub sym: String, pub value: AT, }
#[cfg(feature = "legacy-parser")]
impl<AT:Default> Lextoken<AT>
{
pub fn new(name:String, val:AT) -> Lextoken<AT>
{
Lextoken {
sym : name,
value : val,
}
}}
#[cfg(feature = "legacy-parser")]
pub trait Lexer<AT:Default>
{
fn nextsym(&mut self) -> Option<Lextoken<AT>>;
fn linenum(&self) -> usize { 0 } fn column(&self) -> usize { 0 }
fn current_line(&self) -> &str { "" }
}
#[cfg(feature = "legacy-parser")]
pub struct charlexer<'t>
{
chars: Chars<'t>,
index: usize,
len: usize,
line:usize,
keep_ws: bool, pub modify: fn(char)->String,
}
#[cfg(feature = "legacy-parser")]
impl<'t> charlexer<'t>
{
pub fn new<'u:'t>(input:&'u str) -> charlexer<'u>
{ charlexer {chars:input.chars(), index:0, len:input.len(), line:1, keep_ws:false, modify: |x|{x.to_string()}} }
pub fn make<'u:'t>(input:&'u str, kws:bool) -> charlexer<'u>
{ charlexer {chars:input.chars(), index:0, len:input.len(), line:1, keep_ws:kws, modify:|x|{x.to_string()}} }
}
#[cfg(feature = "legacy-parser")]
impl<'t, AT:Default> Lexer<AT> for charlexer<'t>
{
fn nextsym(&mut self) -> Option<Lextoken<AT>>
{
let mut res = None;
let mut stop = false;
while !stop && self.index<self.len
{
let nc = self.chars.next();
res=match nc { None => {stop=true; None},
Some(c) => {
self.index+=1;
if c=='\n' {self.line+=1;}
if c.is_whitespace() && !self.keep_ws {None}
else {
stop=true;
let mc = (self.modify)(c);
Some(Lextoken::new(mc,AT::default()))}
},
} } if (self.index<=self.len) {res} else {None}
} fn linenum(&self) -> usize { self.line }
fn column(&self) -> usize { self.index }
fn current_line(&self) -> &str
{
self.chars.as_str()
}
}
pub struct TerminalToken<'t,AT:Default>
{
pub sym: &'t str,
pub value: AT,
pub line:usize,
pub column:usize,
}
impl<'t,AT:Default> TerminalToken<'t,AT>
{
pub fn new(s:&'t str, v:AT, ln:usize, cl:usize) -> TerminalToken<'t,AT>
{ TerminalToken{sym:s, value:v, line:ln, column:cl} }
pub fn transfer(&self, s:&'t str, v:AT) -> TerminalToken<'t,AT>
{ TerminalToken{sym:s, value:v, line:self.line, column:self.column} }
pub fn from_raw(rt:(RawToken<'t>,usize,usize),s:&'t str,v:AT) -> TerminalToken<'t,AT>
{ TerminalToken{sym:s, value:v, line:rt.1, column:rt.2} }
pub fn lb<T>(&self,e:T) -> LBox<T> { LBox::new(e,self.line,self.column ) }
pub fn lba<T:'static>(&self,e:T) -> LBox<dyn Any> { LBox::upcast(LBox::new(e,self.line,self.column )) }
pub fn lrc<T>(&self,e:T) -> LRc<T> { LRc::new(e,self.line,self.column ) }
pub fn lrca<T:'static>(&self,e:T) -> LRc<dyn Any> { LRc::upcast(LRc::new(e,self.line,self.column )) }
}
impl<'t,AT:Default+'static> TerminalToken<'t,AT>
{
pub fn raw_to_lba(rt:(RawToken<'t>,usize,usize),s:&'t str,v:AT) -> TerminalToken<'t,LBox<dyn Any>> {
TerminalToken {
sym:s,
value: lbup!(LBox::new(v,rt.1,rt.2)),
line:rt.1, column:rt.2,
}
}
}
pub trait Tokenizer<'t,AT:Default>
{
fn nextsym(&mut self) -> Option<TerminalToken<'t,AT>>;
fn linenum(&self) -> usize { 0 } fn column(&self) -> usize { 0 }
fn position(&self) -> usize { 0 }
fn add_priority_symbol(&mut self, sym:&'static str) {}
fn current_line(&self) -> &str { "" }
fn get_line(&self,i:usize) -> Option<&str> {None}
fn get_slice(&self,start:usize,end:usize) -> &str {""}
fn source(&self) -> &str {""}
fn transform_wildcard(&self,t:TerminalToken<'t,AT>) -> TerminalToken<'t,AT>
{t}
fn next_tt(&mut self) -> TerminalToken<'t,AT>
{
match self.nextsym() {
Some(tok) => tok,
None => TerminalToken::new("EOF",AT::default(),self.linenum(),self.column()),
} }}
#[derive(Debug)]
pub enum RawToken<'t>
{
Num(i64),
Float(f64),
BigNumber(&'t str),
Char(char),
Strlit(&'t str),
Alphanum(&'t str),
Symbol(&'t str),
Byte(u8),
Bytes(&'t [u8]),
Newline,
Whitespace(usize), Verbatim(&'t str),
Custom(&'static str, &'t str),
Skipto(&'t str),
Skipmatched(&'t str),
LexError,
}
impl<'t> RawToken<'t> {
pub fn to_staticstr(&self) -> &'static str {
match self {
Num(_) => "RawToken::Num",
Float(_) => "RawToken::Float",
BigNumber(_) => "RawToken::BigNumber",
Char(_) => "RawToken::Char",
Strlit(_) => "RawToken::Strlit",
Alphanum(_) => "RawToken::Alphanum",
Symbol(_) => "RawToken::Symbol",
Byte(_) => "RawToken::Byte",
Bytes(_) => "RawToken::Bytes",
Newline => "RawToken::Newline",
Whitespace(_) => "RawToken::Whitespace",
Verbatim(_) => "RawToken::Verbatim",
Custom(s,_) => s,
Skipto(_) => "RawToken::Skipto",
Skipmatched(_) => "RawToken::Skipmatched",
LexError => "RawToken:LexError",
} }
}
pub struct StrTokenizer<'t>
{
decuint:Regex,
hexnum:Regex,
floatp:Regex,
alphan:Regex,
nonalph:Regex,
custom_defined:Vec<(&'static str,Regex)>,
doubles:HashSet<&'t str>,
singles:HashSet<char>,
triples:HashSet<&'t str>,
input: &'t str,
position: usize,
prev_position: usize,
pub keep_whitespace:bool,
pub keep_newline:bool,
line:usize,
line_comment:&'t str,
ml_comment_start:&'t str,
ml_comment_end:&'t str,
pub keep_comment:bool,
line_start:usize, src:&'t str, pub line_positions:Vec<usize>, skipbegin: &'static str,
skipend: &'static str,
skipcount : i32,
pub specialeof: &'static str,
pub tab_spaces:usize,
linetabs:usize,
pub allow_newline_in_string: bool,
pub priority_symbols:BTreeMap<&'static str,u32>,
}
impl<'t> StrTokenizer<'t>
{
pub fn new() -> StrTokenizer<'t>
{
let decuint = Regex::new(r"^\d+").unwrap();
let hexnum = Regex::new(r"^0x[\dABCDEFabcdef]+").unwrap();
let floatp = Regex::new(r"^\d*\x2E\d+([eE][+-]?\d+)?").unwrap();
let alphan = Regex::new(r"^[_a-zA-Z][_\da-zA-Z]*").unwrap();
let nonalph=Regex::new(r"^[!@#$%\^&*\?\-\+\*/\.,<>=~`';:\|\\]+").unwrap();
let custom_defined = Vec::new(); let mut doubles = HashSet::with_capacity(16);
let mut triples = HashSet::with_capacity(16);
let mut singles = HashSet::with_capacity(16);
for c in ['(',')','[',']','{','}'] {singles.insert(c);}
let input = "";
let position = 0;
let prev_position = 0;
let keep_whitespace=false;
let keep_newline=false;
let line = 1;
let line_comment = "//";
let ml_comment_start="/*";
let ml_comment_end="*/";
let keep_comment=false;
let line_start=0;
let src = "";
let line_positions = vec![0,0];
let skipbegin = "";
let skipend = "";
let skipcount = 0;
let specialeof = "$_RREOF_$";
let tab_spaces = 6;
let linetabs = 0;
let allow_newline_in_string = false;
let priority_symbols=BTreeMap::new();
StrTokenizer{decuint,hexnum,floatp,alphan,nonalph,custom_defined,doubles,singles,triples,input,position,prev_position,keep_whitespace,keep_newline,line,line_comment,ml_comment_start,ml_comment_end,keep_comment,line_start,src,line_positions,skipbegin,skipend,skipcount,specialeof,tab_spaces,linetabs,allow_newline_in_string,priority_symbols}
}
pub fn map<G,FM:FnOnce(&mut StrTokenizer<'t>) -> G>(&mut self,f:FM) -> G {
f(self)
}
pub fn current_text(&self) -> &'t str {
&self.input[self.previous_position()..self.current_position()]
}
pub fn add_double(&mut self, s:&'t str)
{
if s.len()==2 { self.doubles.insert(s); }
}
pub fn add_single(&mut self, c:char) { self.singles.insert(c);}
pub fn add_triple(&mut self, s:&'t str)
{ if s.len()==3 {self.triples.insert(s);} }
pub fn add_priority_symbol(&mut self, s:&'static str) {
if s.len()>0 {
let scget = self.priority_symbols.get_mut(s);
if let Some(scount) = scget {
let newcount = 1 + *scount;
self.priority_symbols.insert(s,newcount);
} else {
self.priority_symbols.insert(s,1);
}
} }
pub fn del_priority_symbol(&mut self, s:&'static str) {
if let Some(v) = self.priority_symbols.get_mut(s) {
if (*v>1) {*v -= 1; }
else { self.priority_symbols.remove(s); }
}
}
pub fn skip_to(&mut self, target:&'static str)
{self.skipend=target; self.skipbegin=""; }
pub fn skip_reset(&mut self) {
self.skipend=""; self.skipbegin=""; self.skipcount=0;
self.specialeof = "$_RREOF_$";
}
pub fn skip_match(&mut self,lbr:&'static str,rbr:&'static str,offset:i32,delimit:&'static str)
{
if lbr.len()==0 || rbr.len()==0 || lbr==rbr {eprintln!("LEXICAL SCANNER ERROR: ILLEGAL SKIP_MATCH BRACKETS"); return;}
self.skipbegin = lbr;
self.skipend = rbr;
self.skipcount = offset;
self.specialeof=delimit;
}
pub fn add_custom(&mut self, tkind:&'static str, reg_expr:&str)
{
let reg = if reg_expr.starts_with('^') || reg_expr.starts_with("(?m") {reg_expr.to_owned()} else {format!("^{}",reg_expr)};
let re = Regex::new(®).expect(&format!("Error compiling custom regular expression \"{}\"",reg_expr));
self.custom_defined.push((tkind,re));
}
pub fn set_input(&mut self, inp:&'t str)
{
self.input=inp.trim_end(); self.position=0; self.line=1; self.line_start=0;
self.line_positions = vec![0,0];
}
pub fn set_line_comment(&mut self,cm:&'t str) {
self.line_comment=cm;
}
pub fn set_multiline_comments(&mut self,cm:&'t str)
{
if cm.len()==0 {
self.ml_comment_start=""; self.ml_comment_end=""; return;
}
let split:Vec<_> = cm.split_whitespace().collect();
if split.len()!=2 {return;}
self.ml_comment_start = split[0].trim();
self.ml_comment_end = split[1].trim();
}
pub fn line(&self)->usize {self.line}
pub fn column(&self)->usize {
if self.position<self.line_start {1}
else {self.position-self.line_start+1}
}
pub fn current_position(&self)-> usize {self.position}
pub fn previous_position(&self)-> usize {self.prev_position}
pub fn get_source(&self) -> &str {self.src}
pub fn set_source<'u:'t>(&mut self, s:&'u str) {self.src=s;}
pub fn current_line(&self) -> &str
{
let mut startl = self.line_start;
if startl>self.input.len() {startl=self.input.len();}
let max = self.input.len() - startl;
let endl = self.input[startl..].find('\n').unwrap_or(max);
&self.input[startl..startl+endl]
}
pub fn get_line(&self,i:usize) -> Option<&str>
{
if i<1 || i>=self.line_positions.len() {return None;}
let startl = self.line_positions[i];
let endl = *self.line_positions.get(i+1).unwrap_or(&self.input.len());
Some(&self.input[startl..endl])
}
pub fn get_slice(&self,start:usize,end:usize) -> &str
{
if start<end && end<=self.input.len() {&self.input[start..end]} else {""}
}
pub fn reset(&mut self) {
self.position=0; self.prev_position=0; self.line=0; self.line_start=0;
self.line_positions = vec![0,0];
}
pub fn backtrack(&mut self, offset:usize) {
if (self.position >= offset) {self.position -= offset;}
if self.position < self.line_positions[self.line_positions.len()-1] {
let (mut min,mut max) = (1,self.line_positions.len()-2);
while min<=max {
let mid = (min+max)/2;
if self.position >= self.line_positions[mid]
&& self.position<self.line_positions[mid+1] {
self.line = mid;
self.line_positions.truncate(mid+1);
break;
}
else if self.position >= self.line_positions[mid+1] { min = mid+1; }
else { max = mid-1; }
} } }
pub fn next_token(&mut self) -> Option<(RawToken<'t>,usize,usize)>
{
let mut pi = 0;
self.prev_position = self.position;
let clen = self.line_comment.len();
let (cms,cme) = (self.ml_comment_start,self.ml_comment_end);
let mut skipping = false;
let tsps = self.tab_spaces-1;
let mut string_startline =self.line;
while self.position<self.input.len()
{
pi = self.position;
let mut column0 = self.column();
let mut line0 = self.line;
let mut lstart0 = self.line_start;
let mut nextchars = self.input[pi..].chars();
let mut c = nextchars.next().unwrap();
let mut i = pi;
let mut tabs = 0; while c.is_whitespace() && i < self.input.len()
{
if c=='\n' {
self.line+=1; lstart0=self.line_start; self.line_start=i+1; line0=self.line; self.linetabs=0;
self.line_positions.push(i+1);
if self.keep_newline { self.position = i+1; return Some((Newline,self.line-1,(self.linetabs*tsps)+pi-lstart0+1)); }
} else if c=='\t' { tabs+=1; self.linetabs+=1; }
i+= 1;
if i<self.input.len() {c = nextchars.next().unwrap();}
} self.position = i;
if (i>pi && self.keep_whitespace) {
return Some((Whitespace(tabs*tsps+(i-pi)),line0,(self.linetabs*tsps)+self.column()-(i-pi)));}
else if i>pi {continue;}
if self.skipbegin.len()!=0 && self.skipend.len()!=0 {
if self.skipcount==0 && !self.input.starts_with(self.skipbegin) {continue;}
let (llen,rlen) = (self.skipbegin.len(), self.skipend.len());
let mut counter = self.skipcount; let mut stringmode = false;
let mut ci = pi;
loop { if ci>=self.input.len() {break;}
else if !stringmode && self.specialeof.len()!=0 && self.input[ci..].starts_with(self.specialeof) { ci=self.input.len(); break; }
if self.input[ci..].starts_with(self.skipbegin) {
counter+=1; ci+=llen;
}
else if !stringmode && self.input[ci..].starts_with(self.skipend) {
counter-=1; ci +=rlen;
if counter==0 {break;}
}
else if &self.input[ci..ci+1]=="\n" {
self.line+=1; ci += 1; self.line_start=ci;
self.linetabs=0;
self.line_positions.push(ci);
}
else if &self.input[ci..ci+1]=="\"" {
ci +=1; stringmode=!stringmode;
}
else if &self.input[ci..ci+1]=="\t" {self.linetabs+=1;}
else { ci += 1; }
} self.skip_reset();
if ci>=self.input.len() { continue; } self.position = ci;
let poss = if (self.linetabs*tsps)+pi+1>=lstart0 {(self.linetabs*tsps)+pi+1-lstart0} else {0};
return Some((Skipmatched(&self.input[pi..ci]),line0,poss));
} else if self.skipend.len()!=0 { let endpos;
if self.skipend!=self.specialeof {
let findend = self.input[pi..].rfind(self.skipend);
endpos = findend.unwrap_or(self.input.len());
} else {endpos = self.input.len();}
if endpos<self.input.len() {
self.position = pi+self.skipbegin.len()+endpos+self.skipend.len();
} else {
if self.skipend==self.specialeof {
self.position = endpos;
return Some((Skipto(&self.input[pi..]),line0,(self.linetabs*tsps)+(1+pi)-lstart0));
}
self.position = pi;
continue;
} let mut ci = pi;
while let Some(nli) = self.input[ci..self.position].find('\n')
{
self.line+=1; ci += nli+1; self.line_start=ci;
self.line_positions.push(ci);
}
return Some((Skipto(&self.input[pi..self.position]),line0,(self.linetabs*tsps)+pi-lstart0+1));
}
let mut psretval = None;
for (s,sc) in self.priority_symbols.iter() { if (*sc>0) {
let slen = s.len();
if pi+slen<=self.input.len() && *s==&self.input[pi..pi+slen] {
self.position = pi+slen;
psretval = Some((Symbol(s),self.line,(self.linetabs*tsps)+self.column()-slen));
break;
}
} } if let Some((Symbol(s),_,_)) = &psretval {
self.del_priority_symbol(*s);
return psretval;
}
for (ckey,cregex) in self.custom_defined.iter()
{
if let Some(mat) = cregex.find(&self.input[pi..]) {
self.position = mat.end()+pi;
let rawtext = &self.input[pi..self.position];
let oldline = self.line; let oldstart = self.line_start;
let endls:Vec<_>=rawtext.match_indices('\n').collect();
for (x,y) in &endls
{
self.line+=1;
self.line_start += x+1;
self.line_positions.push(self.line_start);
self.linetabs=0;
} let pos9 = if (self.linetabs*tsps)+pi>oldstart {(self.linetabs*tsps)+pi-oldstart} else {0};
return Some((Custom(ckey,rawtext),oldline,1+pos9));
} }
if clen>0 && pi+clen<=self.input.len() && self.input.is_char_boundary(pi+clen) && self.line_comment==&self.input[pi..pi+clen] {
if let Some(nlpos) = self.input[pi+clen..].find("\n") {
self.position = nlpos+pi+clen;
if self.keep_comment {
return Some((Verbatim(&self.input[pi..pi+clen+nlpos]),self.line,(self.linetabs*tsps)+1+pi-self.line_start));
}
else {continue;}
} else { self.position = self.input.len();
if self.keep_comment {return Some((Verbatim(&self.input[pi..]),self.line,(self.linetabs*tsps)+1+pi-self.line_start));}
else {break;}
}
}
if cms.len()>0 && pi+cms.len()<=self.input.len() && &self.input[pi..pi+cms.len()] == cms {
if let Some(endpos) = self.input[pi+cms.len()..].find(cme) {
self.position = pi+cms.len()+endpos+cme.len();
} else {
self.position = self.input.len();
eprintln!("Tokenizer error: unclosed multi-line comment starting on line {}, column {}",line0,pi-self.line_start+1);
return Some((LexError,line0,(self.linetabs*tsps)+1+pi-self.line_start));
}
let mut ci = pi;
while let Some(nli) = self.input[ci..self.position].find('\n')
{
self.line+=1; ci += nli+1; self.line_start=ci; self.linetabs=0;
self.line_positions.push(ci);
}
if self.keep_comment {
return Some((Verbatim(&self.input[pi..self.position]),line0,(self.linetabs*tsps)+1+pi-lstart0));
}
else {continue;}
}
if self.triples.len()>0 && pi+2<self.input.len() && self.triples.contains(&self.input[pi..pi+3]) {
self.position = pi+3;
return Some((Symbol(&self.input[pi..pi+3]),self.line,(self.linetabs*tsps)+self.column()-3));
}
if pi+1<self.input.len() && self.doubles.contains(&self.input[pi..pi+2]) {
self.position = pi+2;
return Some((Symbol(&self.input[pi..pi+2]),self.line,(self.linetabs*tsps)+self.column()-2));
}
if self.singles.contains(&c) {
self.position=pi+1;
return Some((Symbol(&self.input[pi..pi+1]),self.line,(self.linetabs*tsps)+self.column()-1));
}
if c=='\'' && pi+2<self.input.len() && &self.input[pi+2..pi+3]=="\'" {
self.position = pi+3;
let thechar = self.input[pi+1..pi+2].chars().next().unwrap();
return Some((Char(thechar),self.line,(self.linetabs*tsps)+self.column()-3));
}
if c=='\"' {
string_startline = self.line;
let mut ci = pi+1;
while ci<self.input.len()
{
if &self.input[ci..ci+1]=="\"" {
self.position = ci+1;
return Some((Strlit(&self.input[pi..self.position]),line0,(self.linetabs*tsps)+pi-lstart0+1));
}
else if &self.input[ci..ci+1] == "\n" {
if !self.allow_newline_in_string {
eprintln!("Tokenizer Error: unclosed string line {} (allow_newline_in_string option set to false)",line0);
return None;
} else {
self.line+=1; self.line_start=ci+1; self.linetabs=0;
self.line_positions.push(self.line_start);
}
} else if &self.input[ci..ci+1] == "\\" {ci+=1;} else if &self.input[ci..ci+1] == "\t" {self.linetabs+=1;}
ci+=1;
} self.position = self.input.len();
eprintln!("Tokenizer Error: unclosed string, line {}, possibly starting on line {}",line0,string_startline);
let errposition = if (lstart0-1)<pi {1+pi-lstart0} else {0};
return Some((LexError,line0,(self.linetabs*tsps)+pi-lstart0+1));
}
if let Some(mat) = self.hexnum.find(&self.input[pi..]) {
self.position = mat.end()+pi;
let tryparse = i64::from_str_radix(&self.input[pi+2..self.position],16);
if let Ok(hn) = tryparse {return Some((Num(hn),self.line,(self.linetabs*tsps)+pi+3-self.line_start));}
else {return Some((BigNumber(&self.input[pi..self.position]),self.line,(self.linetabs*tsps)+pi-self.line_start+1));}
} if let Some(mat) = self.alphan.find(&self.input[pi..]) {
self.position = mat.end()+pi;
let pos9 = if (self.linetabs*tsps)+pi<self.line_start {0} else {(self.linetabs*tsps)+pi-self.line_start};
return Some((Alphanum(&self.input[pi..self.position]),self.line,pos9+1));
} if let Some(mat) = self.floatp.find(&self.input[pi..]) {
self.position = mat.end()+pi;
let tryparse = self.input[pi..self.position].parse::<f64>();
if let Ok(n)=tryparse {return Some((Float(n),self.line,(self.linetabs*tsps)+(pi+1)-self.line_start));}
else {return Some((BigNumber(&self.input[pi..self.position]),self.line,(self.linetabs*tsps)+pi-self.line_start+1));}
} if let Some(mat) = self.decuint.find(&self.input[pi..]) {
self.position = mat.end()+pi;
let tryparse = self.input[pi..self.position].parse::<i64>();
if let Ok(n)=tryparse {return Some((Num(n),self.line,(self.linetabs*tsps)+1+pi-self.line_start));}
else {return Some((BigNumber(&self.input[pi..self.position]),self.line,(self.linetabs*tsps)+1+pi-self.line_start));}
}
if pi<self.input.len() && &self.input[pi..pi+1]=="\"" {
self.position = self.input.len();
eprintln!("Tokenizer error: unclosed string starting on line {}, column {}",line0,pi-self.line_start+1);
return Some((LexError,line0,(self.linetabs*tsps)+1+pi-self.line_start));
}
if let Some(mat) = self.nonalph.find(&self.input[pi..]) {
self.position = mat.end()+pi;
return Some((Symbol(&self.input[pi..self.position]),self.line,(self.linetabs*tsps)+1+pi-self.line_start));
}
self.position = self.input.len();
if pi<self.position {
eprintln!("Tokenizer Error: unrecognized symbols starting on line {}, column {}",line0,pi-self.line_start+1);
return Some((LexError,line0,(self.linetabs*tsps)+1+pi-self.line_start));
}
} return None;
}
}
impl<'t> Iterator for StrTokenizer<'t>
{
type Item = (RawToken<'t>,usize,usize);
fn next(&mut self) -> Option<(RawToken<'t>,usize,usize)>
{
if let Some(tok) = self.next_token() {Some(tok)} else {None}
}
}
pub struct LexSource<'t>
{
pathname:&'t str,
contents:String,
bump:Option<Bump>,
}
impl<'t> LexSource<'t>
{
pub fn new(path:&'t str) -> std::io::Result<LexSource<'t>>
{
let tryread=std::fs::read_to_string(path);
match tryread {
Ok(st) => {
Ok(LexSource {
pathname:path,
contents:st,
bump:None,
})
},
Err(e) => {
eprintln!("\nFAILED TO OPEN PATH TO SOURCE '{}'\n",path);
Err(e)
},
} }
pub fn from_file(path:&'t str) -> std::io::Result<LexSource<'t>> {
LexSource::new(path)
}
fn from_stdin_i(b:bool) -> Self {
let mut strbuf = String::new();
let stdin = io::stdin();
for ln in stdin.lines() {
let rr=ln.map(|x|{strbuf.push_str(&x); strbuf.push_str("\r\n");});
} LexSource {
pathname: "stdin",
contents:strbuf,
bump:if b {Some(Bump::new())} else {None},
}
}
fn from_bufread_i<B:io::BufRead>(br:B, b:bool) -> Self {
let mut strbuf = String::new();
for ln in br.lines() {
let rr=ln.map(|x|{strbuf.push_str(&x); strbuf.push_str("\r\n");});
} LexSource {
pathname: "stdin",
contents:strbuf,
bump:if b {Some(Bump::new())} else {None},
}
}
pub fn with_bump(path:&'t str) -> std::io::Result<LexSource<'t>>
{
let tryread=std::fs::read_to_string(path);
match tryread {
Ok(st) => {
let newsource = LexSource {
pathname:path,
contents:st,
bump:Some(Bump::new()),
};
Ok(newsource)
},
Err(e) => {
eprintln!("\nFAILED TO OPEN PATH TO SOURCE '{}'\n",path);
Err(e)
},
} }
pub fn from_stdin() -> Self {
Self::from_stdin_i(false)
}
pub fn from_stdin_bump() -> Self {
Self::from_stdin_i(true)
}
pub fn from_bufread(br:impl io::BufRead) -> Self {
Self::from_bufread_i(br,false)
}
pub fn from_bufread_bump(br:impl io::BufRead) -> Self {
Self::from_bufread_i(br,true)
}
pub fn get_bump(&self) -> Option<&Bump> {
self.bump.as_ref()
}
pub fn get_contents(&self)->&str {&self.contents}
pub fn get_path(&self)->&str {self.pathname}
}
impl<'t> StrTokenizer<'t>
{
pub fn from_source(ls:&'t LexSource<'t>) ->StrTokenizer<'t>
{
let mut stk = StrTokenizer::new();
stk.set_source(ls.get_path());
stk.set_input(ls.contents.as_str());
let res=stk.line_positions.try_reserve(stk.input.len()/40);
stk
}
pub fn from_str(s:&'t str) -> StrTokenizer<'t>
{
let mut stk = StrTokenizer::new();
stk.set_input(s);
stk
}
}
pub struct charscanner<'t>
{
contents: &'t str,
index: usize,
line:usize,
keep_ws: bool, pub modify: fn(&'t str)->&'t str
}
impl<'t> charscanner<'t>
{
pub fn new(input:&'t str, kws:bool) -> charscanner<'t>
{ charscanner {
contents:input,
index:0,
line:1,
keep_ws:kws,
modify:|x|{x},
}
}
}
impl<'t, AT:Default> Tokenizer<'t,AT> for charscanner<'t>
{
fn nextsym(&mut self) -> Option<TerminalToken<'t,AT>>
{
let mut res = None;
let mut stop = false;
let mut i = self.index;
while !stop && i<self.contents.len()
{
let c = self.contents[i..i+1].chars().next().unwrap();
if c=='\n' {self.line+=1;}
if c.is_whitespace() && !self.keep_ws {
i+=1; continue;
}
else if c.is_whitespace() && self.keep_ws {
stop = true;
res = Some(TerminalToken::new(&self.contents[i..i+1],AT::default(),self.line,i));
}
else {
stop=true;
let mc = (self.modify)(&self.contents[i..i+1]);
res =Some(TerminalToken::new(mc,AT::default(),self.line,i));
}
} self.index = i+1;
return res;
} fn linenum(&self) -> usize { self.line }
fn column(&self) -> usize { self.index }
fn current_line(&self) -> &str {self.contents}
}
fn brsearch(ps:&[usize], p:usize) -> (usize,usize) {
let mut min = 1;
let mut max = ps.len();
while min<max
{
let mid = (min+max)/2;
if ps[mid]>p {max = mid;}
else { if mid == ps.len()-1 || p<ps[mid+1] {return (mid,ps[mid]);}
else {min = mid+1;}
}
}
(0,0) }