#![warn(clippy::all)]
#![warn(missing_docs)]
#![warn(missing_doc_code_examples)]
use strcursor::StrCursor;
#[derive(Clone, Debug, PartialEq)]
pub struct Pretoken<'a> {
pub s: &'a str,
pub line: usize,
pub offset: usize,
}
impl<'a> Pretoken<'a> {
pub fn new(
start: StrCursor<'a>,
end: StrCursor<'a>, line: usize,
offset: usize) -> Pretoken<'a> {
Pretoken{ s:start.slice_between(end).unwrap(), line, offset}
}
}
#[derive(Clone, Debug)]
pub struct Pretokenizer<'a> {
pos: StrCursor<'a>,
line: usize,
}
impl<'a> Pretokenizer<'a> {
pub fn new(s: &'a str) -> Pretokenizer {
Pretokenizer{
pos: StrCursor::new_at_start(s),
line: 1, }
}
fn make_pretok(&mut self, end: StrCursor<'a>) -> Option<Pretoken<'a>> {
if end == self.pos {
return None;
}
let start = self.pos;
self.pos = end;
Some(Pretoken::new(start, end, self.line, start.byte_pos()))
}
}
impl <'a> std::iter::Iterator for Pretokenizer<'a> {
type Item = Pretoken<'a>;
fn next(&mut self) -> Option<Self::Item> {
#[derive(Debug)]
enum STATE {
WS,
MaybeComment,
LineComment,
BlockComment,
MaybeBlockCommentDone,
StartTok,
NormalTok,
QuotedTok,
EscapeChar,
};
let mut state = STATE::WS;
let mut curs = self.pos;
loop {
let copt = curs.cp_after();
if copt.is_none() {
match state {
STATE::NormalTok => {
return self.make_pretok(curs);
}
STATE::BlockComment => {
}
STATE::QuotedTok | STATE::EscapeChar => {
return self.make_pretok(curs);
}
_ => {}
}
self.pos = curs; return None;
}
let c = copt.unwrap();
match state {
STATE::WS => {
match c {
'\n' => {
self.line += 1;
curs.seek_next_cp();
}
' ' | '\t' => {
curs.seek_next_cp();
}
'/' => {
state = STATE::MaybeComment;
curs.seek_next_cp();
}
_ => state = STATE::StartTok,
}
}
STATE::MaybeComment => {
match c {
'/' => {
state = STATE::LineComment;
curs.seek_next_cp();
}
'*' => {
state = STATE::BlockComment;
curs.seek_next_cp();
}
_ => state = STATE::StartTok,
}
}
STATE::LineComment => {
if c == '\n' {
state = STATE::WS;
} else {
curs.seek_next_cp();
}
}
STATE::BlockComment => {
match c {
'*' => {
state = STATE::MaybeBlockCommentDone;
}
'\n' => {
self.line += 1;
}
_ => {}
}
curs.seek_next_cp();
}
STATE::MaybeBlockCommentDone => {
match c {
'/' => {
state = STATE::WS;
}
'\n' => {
self.line += 1;
state = STATE::BlockComment;
}
_ => { state = STATE::BlockComment; }
}
curs.seek_next_cp();
}
STATE::StartTok => {
self.pos = curs;
if c == '"' {
state = STATE::QuotedTok;
curs.seek_next_cp();
} else {
state = STATE::NormalTok;
curs.seek_next_cp();
}
}
STATE::NormalTok => {
match c {
' ' | '\t' => {
return self.make_pretok(curs);
}
'\n' => {
return self.make_pretok(curs);
}
'"' => {
return self.make_pretok(curs);
}
'/' => {
let mut temp = curs;
temp.seek_next_cp(); let temp_copt = temp.cp_after();
if temp_copt.is_none() {
return self.make_pretok(temp);
} else {
match temp_copt.unwrap() {
'/' | '*' => {
return self.make_pretok(curs);
}
_ => {
curs.seek_next_cp();
}
}
}
}
_ => { curs.seek_next_cp(); }
}
}
STATE::QuotedTok => {
match c {
'\n' => {
self.line +=1;
}
'"' => {
curs.seek_next_cp();
return self.make_pretok(curs);
}
'\\' => {
state = STATE::EscapeChar;
}
_ => { }
}
curs.seek_next_cp();
}
STATE::EscapeChar => {
if c == '\n' {
self.line +=1;
}
state = STATE::QuotedTok;
curs.seek_next_cp();
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn pretokenizer_test_0() {
let mut pt = Pretokenizer::new("");
assert!(pt.next().is_none());
}
#[test]
fn pretokenizer_test_1() {
let mut pt = Pretokenizer::new("foo");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 1);
assert_eq!(t.s, "foo");
}
#[test]
fn pretokenizer_test_2() {
let mut pt = Pretokenizer::new("foo\n");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 1);
assert_eq!(t.s, "foo");
}
#[test]
fn pretokenizer_test_3() {
let mut pt = Pretokenizer::new("\nfoo");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 1);
assert_eq!(t.line, 2);
assert_eq!(t.s, "foo");
}
#[test]
fn pretokenizer_test_4() {
let mut pt = Pretokenizer::new("\nfoo\n");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 1);
assert_eq!(t.line, 2);
assert_eq!(t.s, "foo");
}
#[test]
fn pretokenizer_test_5() {
let mut pt = Pretokenizer::new("/* */foo");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 5);
assert_eq!(t.line, 1);
assert_eq!(t.s, "foo");
}
#[test]
fn pretokenizer_test_6() {
let mut pt = Pretokenizer::new("\n/* */foo");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 6);
assert_eq!(t.line, 2);
assert_eq!(t.s, "foo");
}
#[test]
fn pretokenizer_test_7() {
let mut pt = Pretokenizer::new("\n/* */\nfoo");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 7);
assert_eq!(t.line, 3);
assert_eq!(t.s, "foo");
}
#[test]
fn pretokenizer_test_8() {
let mut pt = Pretokenizer::new("// bar");
let t = pt.next();
assert!(t.is_none());
}
#[test]
fn pretokenizer_test_9() {
let mut pt = Pretokenizer::new("\n// bar");
let t = pt.next();
assert!(t.is_none());
}
#[test]
fn pretokenizer_test_10() {
let mut pt = Pretokenizer::new("// bar\n");
let t = pt.next();
assert!(t.is_none());
}
#[test]
fn pretokenizer_test_11() {
let mut pt = Pretokenizer::new("// bar\nfoo");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 7);
assert_eq!(t.line, 2);
assert_eq!(t.s, "foo");
}
#[test]
fn pretokenizer_test_12() {
let mut pt = Pretokenizer::new("// bar\n\nfoo");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 8);
assert_eq!(t.line, 3);
assert_eq!(t.s, "foo");
}
#[test]
fn pretokenizer_test_13() {
let mut pt = Pretokenizer::new("\"");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 1);
assert_eq!(t.s, "\"");
}
#[test]
fn pretokenizer_test_14() {
let mut pt = Pretokenizer::new("\"\"");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 1);
assert_eq!(t.s, "\"\"");
}
#[test]
fn pretokenizer_test_15() {
let mut pt = Pretokenizer::new("\"x\"");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 1);
assert_eq!(t.s, "\"x\"");
}
#[test]
fn pretokenizer_test_16() {
let mut pt = Pretokenizer::new("\" x\"");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 1);
assert_eq!(t.s, "\" x\"");
}
#[test]
fn pretokenizer_test_17() {
let mut pt = Pretokenizer::new("\" x x \"");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 1);
assert_eq!(t.s, "\" x x \"");
}
#[test]
fn pretokenizer_test_18() {
let mut pt = Pretokenizer::new("//\" x x \"");
let t = pt.next();
assert!(t.is_none());
}
#[test]
fn pretokenizer_test_19() {
let mut pt = Pretokenizer::new("\"// x x \"");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 1);
assert_eq!(t.s, "\"// x x \"");
}
#[test]
fn pretokenizer_test_20() {
let mut pt = Pretokenizer::new("\" /* x x */ \"");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 1);
assert_eq!(t.s, "\" /* x x */ \"");
}
#[test]
fn pretokenizer_test_21() {
let mut pt = Pretokenizer::new("\" \\\" x \"");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 1);
assert_eq!(t.s, "\" \\\" x \"");
}
#[test]
fn pretokenizer_test_22() {
let mut pt = Pretokenizer::new("\" \\");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 1);
assert_eq!(t.s, "\" \\");
}
#[test]
fn pretokenizer_test_23() {
let mut pt = Pretokenizer::new("\" \\\"");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 1);
assert_eq!(t.s, "\" \\\"");
}
#[test]
fn pretokenizer_test_24() {
let mut pt = Pretokenizer::new("\" x\nx\"");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 2);
assert_eq!(t.s, "\" x\nx\"");
}
#[test]
fn pretokenizer_test_25() {
let mut pt = Pretokenizer::new("x//x");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 1);
assert_eq!(t.s, "x");
}
#[test]
fn pretokenizer_test_26() {
let mut pt = Pretokenizer::new("x/*x*/");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 1);
assert_eq!(t.s, "x");
}
#[test]
fn pretokenizer_test_27() {
let mut pt = Pretokenizer::new("x/*y*/z");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 1);
assert_eq!(t.s, "x");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 6);
assert_eq!(t.line, 1);
assert_eq!(t.s, "z");
}
#[test]
fn pretokenizer_test_28() {
let mut pt = Pretokenizer::new("x y z");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 0);
assert_eq!(t.line, 1);
assert_eq!(t.s, "x");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 2);
assert_eq!(t.line, 1);
assert_eq!(t.s, "y");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 4);
assert_eq!(t.line, 1);
assert_eq!(t.s, "z");
}
#[test]
fn pretokenizer_test_29() {
let mut pt = Pretokenizer::new(" x\n y\n z");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 2);
assert_eq!(t.line, 1);
assert_eq!(t.s, "x");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 5);
assert_eq!(t.line, 2);
assert_eq!(t.s, "y");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 10);
assert_eq!(t.line, 3);
assert_eq!(t.s, "z");
}
#[test]
fn pretokenizer_test_30() {
let mut pt = Pretokenizer::new(" x // foo\ny\n z");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 2);
assert_eq!(t.line, 1);
assert_eq!(t.s, "x");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 11);
assert_eq!(t.line, 2);
assert_eq!(t.s, "y");
let t = pt.next();
assert!(t.is_some());
let t = t.unwrap();
assert_eq!(t.offset, 16);
assert_eq!(t.line, 3);
assert_eq!(t.s, "z");
}
#[test]
fn pretokenizer_test_31() {
let pt = Pretokenizer::new("a+b c// stuff\nd");
for tok in pt {
println!("{} found on line {}, offset {}",
tok.s, tok.line, tok.offset);
}
}
}