use errors::Error;
use parser::Time;
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Token {
Number(u32),
String(String),
Time(Time),
}
struct Reader {
chars: Vec<char>,
position: usize,
}
const DIGITS: [char; 10] = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
fn is_whitespace(c: char) -> bool {
c.is_whitespace() || c == '\u{feff}'
}
impl Reader {
fn new(source: &str) -> Self {
Reader {
chars: source.chars().collect(),
position: 0,
}
}
fn available(&self) -> bool {
self.chars.len() > self.position
}
fn peek(&self, n: usize) -> Result<String, Error> {
if self.position + n <= self.chars.len() {
Ok(self.chars[self.position..self.position + n]
.into_iter()
.collect())
} else {
Err("Tried to read out of bounds of reader.".into())
}
}
fn take(&mut self, n: usize) -> Result<String, Error> {
self.peek(n).map(|s| {
self.position += n;
s
})
}
fn try_take_time(&mut self) -> Option<Time> {
self.peek(8).ok().and_then(|s| s.parse().ok()).map(|time| {
self.position += 8;
time
})
}
fn try_take_number(&mut self) -> Option<u32> {
let s = match self.peek(2) {
Ok(s) => s,
Err(_) => return None,
};
if s.chars()
.map(|c| DIGITS.contains(&c))
.fold(true, |old, new| old && new)
{
if let Ok(s3) = self.peek(3) {
if !is_whitespace(s3.chars().nth(2).unwrap()) {
return None;
}
}
self.position += 3;
Some(s.parse().unwrap())
} else {
None
}
}
fn take_string(&mut self) -> Result<String, Error> {
let mut result = Vec::new();
let first = self.take(1)?.chars().next().unwrap();
let is_quoted = first == '"';
if !is_quoted {
result.push(first);
}
while let Ok(next) = self.take(1) {
let next = next.chars().next().unwrap();
if next == '"' {
if is_quoted {
return Ok(result.into_iter().collect());
} else {
return Err("The `\"` char is not allowed in strings.".into());
}
} else if !is_quoted && is_whitespace(next) {
break;
} else {
result.push(next);
}
}
if is_quoted {
Err("Opened string not closed until EOF.".into())
} else {
Ok(result.into_iter().collect())
}
}
fn try_skip_whitespace(&mut self) {
while let Ok(next) = self.peek(1) {
let next = next.chars().next().unwrap();
if is_whitespace(next) {
self.position += 1;
} else {
return;
}
}
}
}
pub fn tokenize(source: &str) -> Result<Vec<Token>, Error> {
let mut tokens = Vec::new();
let mut reader = Reader::new(source);
reader.try_skip_whitespace();
while reader.available() {
if let Some(time) = reader.try_take_time() {
tokens.push(Token::Time(time));
} else if let Some(num) = reader.try_take_number() {
tokens.push(Token::Number(num));
} else {
tokens.push(Token::String(reader.take_string()?));
}
reader.try_skip_whitespace();
}
Ok(tokens)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn try_take_time() {
let mut r1 = Reader::new("10:11:12");
assert_eq!(r1.try_take_time(), Some(Time::new(10, 11, 12)));
let mut r2 = Reader::new("10");
assert_eq!(r2.try_take_time(), None);
let mut r3 = Reader::new(" ");
assert_eq!(r3.try_take_time(), None);
}
#[test]
fn try_take_number() {
let mut r1 = Reader::new("12");
assert_eq!(r1.try_take_number(), Some(12));
let mut r2 = Reader::new("xyz");
assert_eq!(r2.try_take_number(), None);
let mut r3 = Reader::new(" ");
assert_eq!(r3.try_take_number(), None);
}
#[test]
fn string_starting_with_num() {
let mut r1 = Reader::new("860B640B");
assert_eq!(r1.try_take_number(), None);
assert_eq!(r1.take_string().unwrap(), "860B640B".to_string());
}
#[test]
fn take_string() {
let mut r1 = Reader::new("abc");
assert_eq!(r1.take_string().unwrap(), "abc".to_string());
let mut r2 = Reader::new("\"abc\"");
assert_eq!(r2.take_string().unwrap(), "abc".to_string());
}
#[test]
fn basic_types() {
let source = r#"ABC 12 10:10:30 Abc"#;
let tokens = tokenize(source).unwrap();
println!("{:?}", tokens);
assert_eq!(tokens.len(), 4);
assert_eq!(tokens[0], Token::String("ABC".to_string()));
assert_eq!(tokens[1], Token::Number(12));
assert_eq!(tokens[2], Token::Time(Time::new(10, 10, 30)));
assert_eq!(tokens[3], Token::String("Abc".to_string()));
}
#[test]
fn test_strings() {
let source = r#"ABC "xyz xyz 12 10:10:30" " abc ""#;
let tokens = tokenize(source).unwrap();
println!("{:?}", tokens);
assert_eq!(tokens.len(), 3);
assert_eq!(tokens[0], Token::String("ABC".to_string()));
assert_eq!(tokens[1], Token::String("xyz xyz 12 10:10:30".to_string()));
assert_eq!(tokens[2], Token::String(" abc ".to_string()));
}
}