extern crate std;
use std::error::Error as FmtError;
use std::io::Read;
use self::Error::*;
#[derive(Debug)]
pub enum Error {
IOError(std::io::Error),
SyntaxError,
}
impl From<std::io::Error> for Error {
fn from(error: std::io::Error) -> Self {
IOError(error)
}
}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", self.description())
}
}
impl std::error::Error for Error {
fn description(&self) -> &str {
match *self {
IOError(ref err) => err.description(),
SyntaxError => "Syntax error",
}
}
}
#[derive(Debug, PartialEq)]
pub enum LexemeKind {
String,
Identifier,
BlockStart,
BlockEnd,
PropertyEnd,
StatementEnd,
ListSeparator,
}
pub struct Lexeme<'a> {
kind: LexemeKind,
start: usize,
size: usize,
pub line: usize,
pub column: usize,
pub file: &'a str,
}
impl<'a> std::fmt::Display for Lexeme<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f, "{:?} [{};{}] ({} at l{},c{})",
self.kind, self.start, self.size,
self.file, self.line, self.column)
}
}
impl<'a> Lexeme<'a> {
pub fn new(kind: LexemeKind, start: usize, size: usize,
line: usize, column: usize, file: &'a str) -> Lexeme<'a> {
Lexeme {
kind: kind,
start: start,
size: size,
line: line,
column: column,
file: file,
}
}
pub fn kind_get(&self) -> &LexemeKind {
&self.kind
}
}
pub struct LexerError<'a> {
pub file: &'a str,
pub line: usize,
pub column: usize,
pub message: String,
}
pub struct Lexer<'a> {
lexemes: Vec<Lexeme<'a>>,
contents: String,
errors: Vec<LexerError<'a>>,
}
trait PushLexeme<'a> {
fn pushl(&mut self, kind: LexemeKind, start: usize, size: usize,
line: usize, column: usize, file: &'a str);
}
impl<'a> PushLexeme<'a> for Vec<Lexeme<'a>> {
fn pushl(&mut self, kind: LexemeKind, start: usize, size: usize,
line: usize, column: usize, file: &'a str) {
let lexeme = Lexeme::new(kind, start, size, line, column, file);
trace!("Pushing Lexeme: {}", lexeme);
self.push(lexeme);
}
}
pub fn new<'a>() -> Lexer<'a> {
Lexer {
lexemes: Vec::new(),
contents: String::new(),
errors: Vec::new(),
}
}
impl<'a> Lexer<'a> {
pub fn token_extract(&self, index: usize) -> Option<&str> {
if index >= self.lexemes.len() {
None
} else {
let lexeme = &self.lexemes[index];
let contents = self.contents.as_str();
let slice = &contents[lexeme.start-1..lexeme.start+lexeme.size-1];
Some(slice)
}
}
pub fn errors_get(&self) -> &Vec<LexerError> {
&self.errors
}
pub fn tokens_get(&self) -> &Vec<Lexeme> {
&self.lexemes
}
pub fn lex(&mut self, filename: &'a std::path::Path) -> Result<(), Error> {
let mut file = try!(std::fs::File::open(filename));
let size = try!(file.read_to_string(&mut self.contents));
debug!("Lexing file {:?} of size {} bytes ...", filename, size);
enum Fsm {
Nowhere,
InString,
InIdentifier,
InComment,
}
let mut fsm = Fsm::Nowhere;
let mut start_pos = 0;
let mut single_line_comment = false;
let mut lineno = 1;
let mut start_of_line = 1;
let one = 1;
let mut iterator = self.contents.as_str().char_indices();
while let Some(iter_tup) = iterator.next() {
let pos = iter_tup.0 + 1;
let iter = iter_tup.1;
macro_rules! lex_error {
($msg:ident) => {
let error = LexerError {
line: lineno,
column: pos + 1 - start_of_line,
file: filename.to_str().unwrap(),
message: $msg,
};
self.errors.push(error);
};
}
macro_rules! lex_push {
($id:expr, $start:ident, $size:ident) => {
self.lexemes.pushl($id, $start, $size,
lineno, pos + 1 - start_of_line,
filename.to_str().unwrap());
}
}
if iter == '\n' {
lineno += 1;
start_of_line = pos + 1;
}
loop {
match fsm {
Fsm::Nowhere => {
match iter {
'"' => {
start_pos = pos + 1;
fsm = Fsm::InString;
},
'/' => {
match iterator.next() {
Some(next_tup) => {
match next_tup.1 {
'/' => {
single_line_comment = true;
fsm = Fsm::InComment;
},
'*' => {
fsm = Fsm::InComment;
},
_ => {
let msg = String::from("Unexpected '/' character");
lex_error!(msg);
}
}
},
None => {
let msg = String::from("Unexpected '/' character");
lex_error!(msg);
}
}
},
':' => {
lex_push!(LexemeKind::PropertyEnd, pos, one);
},
';' => {
lex_push!(LexemeKind::StatementEnd, pos, one);
},
',' => {
lex_push!(LexemeKind::ListSeparator, pos, one);
},
'{' => {
lex_push!(LexemeKind::BlockStart, pos, one);
},
'}' => {
lex_push!(LexemeKind::BlockEnd, pos, one);
},
'a' ... 'z' | 'A' ... 'Z' | '_' => {
start_pos = pos;
fsm = Fsm::InIdentifier;
},
' ' | '\t' | '\n' | '\r' => {
},
_ => {
let msg = format!("Syntax error. Unexpected {}.", iter);
lex_error!(msg);
}
}
},
Fsm::InIdentifier => {
match iter {
'a' ... 'z' | 'A' ... 'Z' | '0' ... '9' | '_' | '-' => {
},
'/' => {
let size = pos - start_pos;
lex_push!(LexemeKind::Identifier, start_pos, size);
match iterator.next() {
Some(next_tup) => {
match next_tup.1 {
'/' => {
single_line_comment = true;
fsm = Fsm::InComment;
},
'*' => {
fsm = Fsm::InComment;
},
_ => {
let msg = String::from("Unexpected '/' character");
lex_error!(msg);
}
}
},
None => {
let msg = String::from("Unexpected '/' character");
lex_error!(msg);
}
}
},
_ => {
let size = pos - start_pos;
lex_push!(LexemeKind::Identifier, start_pos, size);
fsm = Fsm::Nowhere;
continue;
}
}
},
Fsm::InComment => {
match iter {
'*' => {
match iterator.next() {
Some(next_tup) => {
match next_tup.1 {
'/' if ! single_line_comment => {
fsm = Fsm::Nowhere;
},
_ => {
}
}
},
None => {
let msg = String::from("Unexpected '*' character");
lex_error!(msg);
}
}
},
'\n' => {
if single_line_comment {
single_line_comment = false;
fsm = Fsm::Nowhere;
}
},
_ => {}
}
}
Fsm::InString => {
match iter {
'"' => {
let size = pos - start_pos;
lex_push!(LexemeKind::String, start_pos, size);
fsm = Fsm::Nowhere;
},
'\n' | '\r' => {
let msg = format!("Invalid character {:?} in string expression", iter);
lex_error!(msg);
},
_ => {}
}
}
}
break;
}
}
if self.errors.is_empty() {
Ok(())
} else {
Err(SyntaxError)
}
}
}
#[cfg(test)]
mod tests {
extern crate std;
use std::io::Read;
fn test_file_get(name: &str, ext: &str) -> std::path::PathBuf {
let mut path = std::path::PathBuf::new();
path.push("tests");
path.push("lexer");
path.push(name);
path.set_extension(ext);
path
}
fn test_source_get(name: &str) -> std::path::PathBuf {
test_file_get(name, "sub")
}
fn test_result_get(name: &str) -> std::path::PathBuf {
test_file_get(name, "txt")
}
fn test_run(lexer_test: &str) {
let source = test_source_get(lexer_test);
let source_path = source.as_path();
let mut lexer = super::new();
match lexer.lex(source_path) {
Ok(_) => {
let mut comp = String::new();
let lexemes = lexer.tokens_get();
for lexeme in lexemes {
let dump = format!("{}\n", lexeme);
comp.push_str(dump.as_str());
}
let result = test_result_get(lexer_test);
let result_path = result.as_path();
let mut matcher = String::new();
let mut file = std::fs::File::open(result_path).unwrap();
assert!(file.read_to_string(&mut matcher).is_ok());
assert_eq!(matcher, comp);
}
Err(err) => {
println!("*** Lexer error: {}", err);
assert!(false);
}
}
}
#[test]
fn lex_valid_file_0() {
test_run("ok_0");
}
#[test]
fn lex_valid_file_1() {
test_run("ok_1");
}
#[test]
#[should_panic]
fn lex_invalid_file_0() {
test_run("fail_0");
}
#[test]
#[should_panic]
fn lex_invalid_file_1() {
test_run("fail_1");
}
#[test]
#[should_panic]
fn lex_invalid_file_2() {
test_run("fail_2");
}
}