pub struct EbnfTokenizer<I: Iterator<Item=char>> {
input: std::iter::Peekable<I>,
buff: Vec<String>,
}
impl<I: Iterator<Item=char>> EbnfTokenizer<I> {
pub fn new(input: I) -> Self {
Self { input: input.peekable(), buff: Vec::new() }
}
fn next_result(&mut self) -> Result<Option<String>, String> {
if self.buff.len() > 0 {
return Ok(Some(self.buff.remove(0)));
}
match self.input.next() {
Some(x) if "[]{}()|;".contains(x) => Ok(Some(x.to_string())),
Some(':') => match self.input.next() {
Some('=') => Ok(Some(":=".to_string())),
_ => Err("Incomplete := operator".to_string()),
},
Some(open) if open == '"' || open == '\'' => {
self.buff.push(open.to_string());
let mut quoted_string = String::new();
let mut escaped = false;
while let Some(ch) = self.input.next() {
if ! escaped && ch == '\\' {
escaped = true;
continue;
}
if escaped {
quoted_string.push('\\')
}
if ! escaped && open == ch {
self.buff.push(quoted_string);
self.buff.push(ch.to_string());
return self.next_result();
}
quoted_string.push(ch);
escaped = false;
}
Err("Unfinished string missing close quote".to_string())
},
Some('#') => {
while let Some(nl) = self.input.next() {
if nl == '\n' {
return self.next_result();
}
}
Err("Unfinished comment missing EOL".to_string())
},
Some(x) if x.is_ascii_alphabetic() || x == '@' || x == '_' => {
let mut id = x.to_string();
while let Some(ch) = self.input.peek() {
if ! ch.is_ascii_alphanumeric() && *ch != '_' {
break;
}
id.push(self.input.next().unwrap());
}
Ok(Some(id))
},
Some(x) if x.is_whitespace() => {
while let Some(ws) = self.input.peek() {
if ! ws.is_whitespace() {
break;
}
self.input.next(); }
self.next_result()
},
Some(ch) => Err(format!("Unexpected char: {}", ch)),
None => Ok(None),
}
}
}
impl<I: Iterator<Item=char>> Iterator for EbnfTokenizer<I> {
type Item = String;
fn next(&mut self) -> Option<Self::Item> {
match self.next_result() {
Err(e) => {
eprintln!("EbnfTokenizer error: {}", e);
None
},
Ok(v) => v
}
}
}
#[cfg(test)]
mod tests {
use super::EbnfTokenizer;
#[test]
fn simple() {
let input = r#"
"hello" world @tag | [ foo ];
{x} # comment
a:=(y)
"escapedstring\""
"#;
let expected = vec![
"\"", "hello", "\"", "world", "@tag", "|", "[", "foo", "]", ";",
"{", "x", "}", "a", ":=", "(", "y", ")", "\"", "escapedstring\\\"",
"\""
];
for (idx, token) in EbnfTokenizer::new(input.chars()).enumerate() {
assert_eq!(token, expected[idx]);
}
}
}