use std;
use abortable_parser::combinators::*;
use abortable_parser::iter::SliceIter;
use abortable_parser::{Error, Result};
use crate::ast::*;
use crate::error::BuildError;
use crate::iter::OffsetStrIter;
pub type CommentGroup = Vec<Token>;
pub type CommentMap = std::collections::BTreeMap<usize, CommentGroup>;
fn is_symbol_char<'a>(i: OffsetStrIter<'a>) -> Result<OffsetStrIter<'a>, u8> {
let mut _i = i.clone();
let c = match _i.next() {
Some(c) => *c,
None => {
return Result::Fail(Error::new(
"Unexpected End of Input".to_string(),
Box::new(_i.clone()),
));
}
};
if (c as char).is_ascii_alphanumeric() || c == b'-' || c == b'_' {
Result::Complete(_i, c)
} else {
Result::Fail(Error::new(
"Not a symbol character".to_string(),
Box::new(_i.clone()),
))
}
}
fn escapequoted<'a>(input: OffsetStrIter<'a>) -> Result<OffsetStrIter<'a>, String> {
let mut frag = String::new();
let mut escape = false;
let mut _input = input.clone();
loop {
let c = match _input.next() {
Some(c) => *c,
None => break,
};
if escape {
match c as char {
'n' => {
frag.push('\n');
escape = false;
continue;
}
'r' => {
frag.push('\r');
escape = false;
continue;
}
't' => {
frag.push('\t');
escape = false;
continue;
}
_ => {
}
}
}
if c == '\\' as u8 && !escape {
escape = true;
} else if c == '"' as u8 && !escape {
return Result::Complete(_input, frag);
} else {
frag.push(c as char);
escape = false;
}
}
return Result::Incomplete(_input.clone());
}
make_fn!(strtok<OffsetStrIter, Token>,
do_each!(
span => input!(),
_ => text_token!("\""),
frag => escapequoted,
(Token{
typ: TokenType::QUOTED,
pos: Position::from(&span),
fragment: frag.to_string(),
})
)
);
make_fn!(barewordtok<OffsetStrIter, Token>,
do_each!(
span => input!(),
_ => peek!(ascii_alpha),
frag => consume_all!(is_symbol_char),
(Token{
typ: TokenType::BAREWORD,
pos: Position::from(&span),
fragment: frag.to_string(),
})
)
);
make_fn!(digittok<OffsetStrIter, Token>,
do_each!(
span => input!(),
_ => peek!(ascii_digit),
digits => consume_all!(ascii_digit),
(Token{
typ: TokenType::DIGIT,
pos: Position::from(&span),
fragment: digits.to_string(),
})
)
);
make_fn!(booleantok<OffsetStrIter, Token>,
do_each!(
span => input!(),
token => either!(
text_token!("true"),
text_token!("false")
),
(Token{
typ: TokenType::BOOLEAN,
pos: Position::from(&span),
fragment: token.to_string(),
})
)
);
macro_rules! do_text_token_tok {
($i:expr, $type:expr, $text_token:expr, WS) => {
do_each!($i,
span => input!(),
frag => text_token!($text_token),
_ => either!(whitespace, comment),
(Token {
typ: $type,
pos: Position::from(&span),
fragment: frag.to_string(),
})
)
};
($i:expr, $type:expr, $text_token:expr) => {
do_each!($i,
span => input!(),
frag => text_token!($text_token),
(Token {
typ: $type,
pos: Position::from(&span),
fragment: frag.to_string(),
})
)
};
}
make_fn!(emptytok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::EMPTY, "NULL")
);
make_fn!(commatok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, ",")
);
make_fn!(lbracetok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "{")
);
make_fn!(rbracetok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "}")
);
make_fn!(lparentok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "(")
);
make_fn!(rparentok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, ")")
);
make_fn!(dottok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, ".")
);
make_fn!(plustok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "+")
);
make_fn!(dashtok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "-")
);
make_fn!(startok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "*")
);
make_fn!(slashtok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "/")
);
make_fn!(modulustok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "%%")
);
make_fn!(pcttok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "%")
);
make_fn!(eqeqtok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "==")
);
make_fn!(notequaltok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "!=")
);
make_fn!(matchtok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "~")
);
make_fn!(notmatchtok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "!~")
);
make_fn!(gttok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, ">")
);
make_fn!(gtequaltok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, ">=")
);
make_fn!(ltequaltok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "<=")
);
make_fn!(lttok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "<")
);
make_fn!(equaltok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "=")
);
make_fn!(semicolontok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, ";")
);
make_fn!(doublecolontok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "::")
);
make_fn!(colontok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, ":")
);
make_fn!(leftsquarebracket<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "[")
);
make_fn!(rightsquarebracket<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "]")
);
make_fn!(fatcommatok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "=>")
);
make_fn!(andtok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "&&")
);
make_fn!(ortok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "||")
);
make_fn!(selecttok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "select", WS)
);
make_fn!(intok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "in", WS)
);
make_fn!(istok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "is", WS)
);
make_fn!(nottok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "not", WS)
);
make_fn!(tracetok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "TRACE", WS)
);
make_fn!(failtok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "fail", WS)
);
make_fn!(functok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "func", WS)
);
make_fn!(moduletok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "module", WS)
);
make_fn!(lettok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "let", WS)
);
make_fn!(importtok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "import", WS)
);
make_fn!(includetok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "include", WS)
);
make_fn!(asserttok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "assert", WS)
);
make_fn!(outtok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "out", WS)
);
make_fn!(converttok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "convert", WS)
);
make_fn!(astok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "as", WS)
);
make_fn!(maptok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "map", WS)
);
make_fn!(filtertok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "filter", WS)
);
make_fn!(reducetok<OffsetStrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "reduce", WS)
);
fn comment(input: OffsetStrIter) -> Result<OffsetStrIter, Token> {
match text_token!(input, "//") {
Result::Complete(rest, _) => {
match until!(
rest,
either!(
eoi,
discard!(text_token!("\r\n")),
discard!(text_token!("\n"))
)
) {
Result::Complete(rest, cmt) => {
let rest =
match optional!(rest, either!(text_token!("\r\n"), text_token!("\n"))) {
Result::Complete(next_rest, _) => next_rest,
_ => rest,
};
return Result::Complete(rest, make_tok!(CMT => cmt.to_string(), input));
}
_ => {
return Result::Abort(Error::new(
"Unparsable comment".to_string(),
Box::new(rest.clone()),
));
}
}
}
Result::Incomplete(ctx) => return Result::Incomplete(ctx),
Result::Fail(e) => return Result::Fail(e),
Result::Abort(e) => return Result::Abort(e),
}
}
make_fn!(whitespace<OffsetStrIter, Token>,
do_each!(
span => input!(),
_ => peek!(ascii_ws),
_ => repeat!(ascii_ws),
(Token{
typ: TokenType::WS,
pos: Position::from(&span),
fragment: String::new(),
})
)
);
make_fn!(end_of_input<OffsetStrIter, Token>,
do_each!(
span => input!(),
_ => eoi,
(Token{
typ: TokenType::END,
pos: Position::from(&span),
fragment: String::new(),
})
)
);
fn token<'a>(input: OffsetStrIter<'a>) -> Result<OffsetStrIter<'a>, Token> {
either!(
input,
strtok,
emptytok,
digittok,
commatok,
rbracetok,
lbracetok,
lparentok,
rparentok,
dottok,
andtok,
ortok,
plustok,
dashtok,
startok,
comment,
slashtok,
modulustok,
pcttok,
eqeqtok,
notequaltok,
matchtok,
notmatchtok,
complete!("Not >=".to_string(), gtequaltok),
complete!("Not <=".to_string(), ltequaltok),
gttok,
lttok,
fatcommatok,
equaltok,
semicolontok,
doublecolontok,
colontok,
leftsquarebracket,
rightsquarebracket,
booleantok,
intok,
istok,
nottok,
lettok,
outtok,
converttok,
selecttok,
asserttok,
failtok,
tracetok,
functok,
moduletok,
importtok,
includetok,
astok,
maptok,
filtertok,
reducetok,
barewordtok,
whitespace,
end_of_input
)
}
pub fn tokenize<'a>(
input: OffsetStrIter<'a>,
mut comment_map: Option<&mut CommentMap>,
) -> std::result::Result<Vec<Token>, BuildError> {
let mut out = Vec::new();
let mut i = input.clone();
let mut comment_group = Vec::new();
let mut comment_was_last: Option<Token> = None;
loop {
if let Result::Complete(_, _) = eoi(i.clone()) {
break;
}
match token(i.clone()) {
Result::Abort(e) => {
return Err(BuildError::from(e));
}
Result::Fail(e) => {
return Err(BuildError::from(e));
}
Result::Incomplete(_offset) => {
let err =
abortable_parser::Error::new("Invalid Token encountered", Box::new(i.clone()));
return Err(BuildError::from(err));
}
Result::Complete(rest, tok) => {
i = rest;
match (&mut comment_map, &tok.typ) {
(&mut Some(_), &TokenType::COMMENT) => {
comment_group.push(tok.clone());
comment_was_last = Some(tok.clone());
continue;
}
(&mut Some(ref mut map), _) => {
if tok.typ != TokenType::WS {
out.push(tok);
}
if let Some(tok) = comment_was_last {
map.insert(tok.pos.line, comment_group);
comment_group = Vec::new();
}
}
(None, TokenType::WS) | (None, TokenType::COMMENT) => continue,
(None, _) => {
out.push(tok);
}
}
comment_was_last = None;
}
}
}
if let Some(ref mut map) = comment_map {
if let Some(ref tok) = comment_group.last() {
let line = tok.pos.line;
map.insert(line, comment_group);
}
}
out.push(Token {
fragment: String::new(),
typ: TokenType::END,
pos: Position::from(&i),
});
Ok(out)
}
pub fn token_clone(t: &Token) -> std::result::Result<Token, Error<SliceIter<Token>>> {
Ok(t.clone())
}
macro_rules! match_type {
($i:expr,BOOLEAN => $h:expr) => {
match_type!($i, TokenType::BOOLEAN, "Not a Boolean", $h)
};
($i:expr,BOOLEAN) => {
match_type!($i, BOOLEAN => token_clone)
};
($i:expr,COMMENT => $h:expr) => {
match_type!($i, TokenType::COMMENT, "Not a Comment", $h)
};
($i:expr,COMMENT) => {
match_type!($i, COMMENT => token_clone)
};
($i:expr,BAREWORD => $h:expr) => {
match_type!($i, TokenType::BAREWORD, "Not a Bareword", $h)
};
($i:expr,BAREWORD) => {
match_type!($i, BAREWORD => token_clone)
};
($i:expr,EMPTY => $h:expr) => {
match_type!($i, TokenType::EMPTY, "Not NULL", $h)
};
($i:expr,EMPTY) => {
match_type!($i, EMPTY => token_clone)
};
($i:expr,STR => $h:expr) => {
match_type!($i, TokenType::QUOTED, "Not a String", $h)
};
($i:expr,STR) => {
match_type!($i, STR => token_clone)
};
($i:expr,DIGIT => $h:expr) => {
match_type!($i, TokenType::DIGIT, "Not a DIGIT", $h)
};
($i:expr,DIGIT) => {
match_type!($i, DIGIT => token_clone)
};
($i:expr,PUNCT => $h:expr) => {
match_type!($i, TokenType::PUNCT, "Not PUNCTUATION", $h)
};
($i:expr,PUNCT) => {
match_type!($i, PUNCT => token_clone)
};
($i:expr, $t:expr, $msg:expr, $h:expr) => {{
use abortable_parser::combinators::eoi;
use abortable_parser::{Error, Result};
use std;
let mut _i = $i.clone();
if eoi(_i.clone()).is_complete() {
Result::Fail(Error::new(format!("End of Input! {}", $msg), Box::new(_i)))
} else {
match _i.next() {
Some(tok) => {
if tok.typ == $t {
match $h(tok) {
std::result::Result::Ok(v) => Result::Complete(_i.clone(), v),
std::result::Result::Err(e) => {
Result::Fail(Error::caused_by($msg, Box::new(e), Box::new(_i)))
}
}
} else {
Result::Fail(Error::new($msg.to_string(), Box::new($i)))
}
}
None => Result::Fail(Error::new($msg.to_string(), Box::new($i))),
}
}
}};
}
macro_rules! match_token {
($i:expr,PUNCT => $f:expr) => {{
use crate::tokenizer::token_clone;
match_token!($i, PUNCT => $f, token_clone)
}};
($i:expr,PUNCT => $f:expr, $h:expr) => {
match_token!($i, TokenType::PUNCT, $f, format!("({})", $f), $h)
};
($i:expr,BAREWORD => $f:expr) => {{
use crate::tokenizer::token_clone;
match_token!($i, BAREWORD => $f, token_clone)
}};
($i:expr,BAREWORD => $f:expr, $h:expr) => {
match_token!(
$i,
TokenType::BAREWORD,
$f,
format!("Expected BAREWORD but got ({})", $f),
$h
)
};
($i:expr, $t:expr, $f:expr, $msg:expr, $h:expr) => {{
use abortable_parser::Result;
use std;
let mut i_ = $i.clone();
let tok = i_.next();
if let Some(tok) = tok {
if tok.typ == $t && &tok.fragment == $f {
match $h(tok) {
std::result::Result::Ok(v) => Result::Complete(i_.clone(), v),
std::result::Result::Err(e) => {
Result::Fail(Error::caused_by($msg, Box::new(e), Box::new(i_)))
}
}
} else {
Result::Fail(Error::new(
format!("Expected {} but got ({})", $msg, tok.fragment),
Box::new($i.clone()),
))
}
} else {
Result::Fail(Error::new("Unexpected End Of Input", Box::new(i_)))
}
}};
}
macro_rules! punct {
($i:expr, $c:expr) => {
match_token!($i, PUNCT => $c)
};
}
macro_rules! word {
($i:expr, $w:expr) => {
match_token!($i, BAREWORD => $w)
};
}
pub fn pos<'a>(i: SliceIter<'a, Token>) -> Result<SliceIter<'a, Token>, Position> {
let mut _i = i.clone();
let tok = _i.next().unwrap();
let pos = tok.pos.clone();
Result::Complete(i, pos)
}
#[cfg(test)]
mod test;