pub use self::CatTokenKind::*;
use moore_common::source::*;
pub struct Cat<'a> {
iter: Box<CharIter<'a>>,
last: usize,
chars: (Option<char>, Option<char>),
indices: (usize, usize),
}
impl<'a> Cat<'a> {
pub fn new(mut iter: Box<CharIter<'a>>) -> Cat<'a> {
let last = iter
.size_hint()
.1
.expect("Iterator must provide upper bounds");
let c0 = iter.next();
let c1 = iter.next();
Cat {
iter: iter,
last: last,
chars: (c0.map(|x| x.1), c1.map(|x| x.1)),
indices: (
c0.map(|x| x.0).unwrap_or(last),
c1.map(|x| x.0).unwrap_or(last),
),
}
}
fn bump(&mut self) {
let c = self.iter.next();
self.chars = (self.chars.1, c.map(|x| x.1));
self.indices = (self.indices.1, c.map(|x| x.0).unwrap_or(self.last));
}
}
impl<'a> Iterator for Cat<'a> {
type Item = CatToken;
fn next(&mut self) -> Option<Self::Item> {
match self.chars {
(None, _) => None,
(Some('\n'), _) => {
let t = CatToken(Newline, self.indices.0, self.indices.1);
self.bump();
Some(t)
}
(Some(c), _) if is_whitespace(c) => {
let p0 = self.indices.0;
while let (Some(c), _) = self.chars {
if !is_whitespace(c) {
break;
}
self.bump();
}
Some(CatToken(Whitespace, p0, self.indices.0))
}
(Some('/'), Some('/')) => {
let p0 = self.indices.0;
while let (Some(c), _) = self.chars {
if c == '\n' {
break;
}
self.bump();
}
Some(CatToken(Comment, p0, self.indices.0))
}
(Some('/'), Some('*')) => {
let p0 = self.indices.0;
while let (Some(c0), Some(c1)) = self.chars {
if c0 == '*' && c1 == '/' {
self.bump();
self.bump();
break;
}
self.bump();
}
Some(CatToken(Comment, p0, self.indices.0))
}
(Some(c), _) if is_symbol(c) => {
let t = CatToken(Symbol(c), self.indices.0, self.indices.1);
self.bump();
Some(t)
}
(Some(c), _) if is_digit(c) => {
let p0 = self.indices.0;
while let (Some(c), _) = self.chars {
if !is_digit(c) {
break;
}
self.bump();
}
Some(CatToken(Digits, p0, self.indices.0))
}
(Some(_), _) => {
let p0 = self.indices.0;
while let (Some(c), _) = self.chars {
if c == '\n' || is_whitespace(c) || is_symbol(c) {
break;
}
self.bump();
}
Some(CatToken(Text, p0, self.indices.0))
}
}
}
}
fn is_whitespace(c: char) -> bool {
c == ' ' || c == '\t' || c == '\r' || c == (0xA0 as char)
}
fn is_digit(c: char) -> bool {
c >= '0' && c <= '9'
}
fn is_symbol(c: char) -> bool {
match c {
'(' | ')' | '[' | ']' | '{' | '}' | '#' | ':' | ';' | '.' | ',' | '=' | '+' | '-' | '*'
| '/' | '~' | '|' | '<' | '>' | '!' | '%' | '^' | '&' | '?' | '\'' | '"' | '`' | '$'
| '\\' | '@' => true,
_ => false,
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct CatToken(pub CatTokenKind, pub usize, pub usize);
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum CatTokenKind {
Newline,
Whitespace,
Comment,
Symbol(char),
Text,
Digits,
Eof,
}
#[cfg(test)]
mod tests {
use super::*;
fn lex(input: &str) -> Vec<CatToken> {
Cat::new(Box::new(input.char_indices())).collect()
}
#[test]
fn empty() {
assert_eq!(lex(""), vec![]);
}
#[test]
fn non_empty() {
assert_eq!(
lex("Löwe 老虎 1234Léopard\n"),
vec![
CatToken(Text, 0, 5),
CatToken(Whitespace, 5, 6),
CatToken(Text, 6, 12),
CatToken(Whitespace, 12, 13),
CatToken(Digits, 13, 17),
CatToken(Text, 17, 25),
CatToken(Newline, 25, 26),
]
);
}
}