asn1rs 0.1.10 - Docs.rs

#[derive(Debug, PartialOrd, PartialEq)]
pub enum Token {
    Text(String),
    Separator(char),
}

impl Token {
    fn append(self, other: Token) -> (Token, Option<Token>) {
        match (self, other) {
            (Token::Text(mut text), Token::Text(other)) => (
                Token::Text({
                    text.push_str(&other);
                    text
                }),
                None,
            ),
            (a, b) => (a, Some(b)),
        }
    }

    pub fn text(&self) -> Option<&String> {
        match self {
            Token::Text(text) => Some(text),
            _ => None,
        }
    }

    pub fn separator(&self) -> Option<char> {
        match self {
            Token::Separator(char) => Some(*char),
            _ => None,
        }
    }
}

#[derive(Default)]
pub struct Tokenizer;

impl Tokenizer {
    pub fn parse(&self, asn: &str) -> Vec<Token> {
        let iter = asn.chars();
        let mut previous = None;
        let mut tokens = Vec::new();

        for char in iter {
            let mut token = None;
            match char {
                ':' | ';' | '=' | '(' | ')' | '{' | '}' | '.' | ',' => {
                    token = Some(Token::Separator(char))
                }
                c if !c.is_control() && c != ' ' => {
                    token = Some(Token::Text(format!("{}", c)));
                }
                ' ' | '\r' | '\n' | '\t' => {
                    if let Some(token) = previous.take() {
                        tokens.push(token);
                    }
                }
                c => eprintln!(
                    "Ignoring unexpected character: {}-0x{:02x}-{:03}",
                    c, c as u8, c as u8
                ),
            }

            if let Some(token) = token.take() {
                previous = match previous {
                    None => Some(token),
                    Some(current) => {
                        let (token, second) = current.append(token);
                        match second {
                            None => Some(token),
                            Some(next) => {
                                tokens.push(token);
                                Some(next)
                            }
                        }
                    }
                }
            }
        }

        if let Some(token) = previous {
            tokens.push(token);
        }
        tokens
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    pub fn test_separator_tokens_not_merged() {
        let result = Tokenizer.parse(":;=(){}.,");
        assert_eq!(
            result,
            vec![
                Token::Separator(':'),
                Token::Separator(';'),
                Token::Separator('='),
                Token::Separator('('),
                Token::Separator(')'),
                Token::Separator('{'),
                Token::Separator('}'),
                Token::Separator('.'),
                Token::Separator(','),
            ]
        )
    }

    #[test]
    pub fn test_text_between_seapators_is_represented_as_one_text_token() {
        let result = Tokenizer.parse("::=ASN{");
        assert_eq!(
            result,
            vec![
                Token::Separator(':'),
                Token::Separator(':'),
                Token::Separator('='),
                Token::Text("ASN".to_string()),
                Token::Separator('{'),
            ]
        )
    }

    #[test]
    pub fn test_invisible_separator_characters() {
        let result = Tokenizer.parse("a b\rc\nd\te AB\rCD\nEF\tGH aa  bb\r\rcc\n\ndd\t\tee");
        assert_eq!(
            result,
            vec![
                Token::Text("a".to_string()),
                Token::Text("b".to_string()),
                Token::Text("c".to_string()),
                Token::Text("d".to_string()),
                Token::Text("e".to_string()),
                Token::Text("AB".to_string()),
                Token::Text("CD".to_string()),
                Token::Text("EF".to_string()),
                Token::Text("GH".to_string()),
                Token::Text("aa".to_string()),
                Token::Text("bb".to_string()),
                Token::Text("cc".to_string()),
                Token::Text("dd".to_string()),
                Token::Text("ee".to_string()),
            ]
        )
    }

    #[test]
    pub fn test_token_text() {
        let token = Token::Text("some text".to_string());
        assert_eq!(token.text(), Some(&"some text".to_string()));
        assert_eq!(token.separator(), None);
    }

    #[test]
    pub fn test_token_separator() {
        let result = Tokenizer.parse("AS\x00N");
        assert_eq!(result, vec![Token::Text("ASN".to_string())])
    }

    #[test]
    pub fn test_control_char_is_ignored() {
        let token = Token::Separator(':');
        assert_eq!(token.text(), None);
        assert_eq!(token.separator(), Some(':'),)
    }
}