use super::token::Token;
#[derive(Debug, Clone)]
pub struct FastaLexer;
impl FastaLexer {
pub fn lex(str: &str) -> Vec<(Token, String)> {
let mut tokens = Vec::new();
let mut buf: String = String::new();
let mut prev = Token::None;
let mut curr = Token::None;
for c in str.chars() {
match (prev, curr, c) {
(_, _, '>') => {
if !buf.is_empty() {
tokens.push((curr, buf.clone()));
buf.clear();
}
prev = curr;
curr = Token::Id;
}
(_, Token::Id, c) if c.is_whitespace() => {
tokens.push((curr, buf.clone()));
buf.clear();
prev = curr;
curr = if c == '\n' {
Token::Sequence
} else {
Token::Metadata
}
}
(_, Token::Metadata, '\n') => {
tokens.push((curr, buf.clone()));
buf.clear();
prev = curr;
curr = Token::Sequence;
}
(_, _, ';') => {
prev = curr;
curr = Token::Comment;
}
(_, Token::Comment, '\n') => {
curr = prev;
}
(_, Token::None, _) => (),
(_, Token::Sequence, c) => {
if !c.is_whitespace() {
buf.push(c);
}
}
(_, curr, c) if curr != Token::Comment => buf.push(c),
_ => (),
}
}
if !buf.is_empty() {
tokens.push((curr, buf.clone()));
}
tokens
}
}