yozuk_sdk/
tokenizer.rs

1use super::tk;
2use super::token::*;
3use pest::{iterators::Pair, Parser};
4
5mod parser {
6    #[derive(pest_derive::Parser)]
7    #[grammar = "token.pest"]
8    pub struct TokenParser;
9}
10
11use parser::*;
12
13#[derive(Default)]
14pub struct Tokenizer {}
15
16impl Tokenizer {
17    pub fn new() -> Self {
18        Default::default()
19    }
20
21    pub fn tokenize(&self, input: &str) -> Vec<Token> {
22        if let Ok(args) = TokenParser::parse(Rule::args, input) {
23            args.filter_map(parse_arg).collect()
24        } else {
25            input
26                .split_whitespace()
27                .map(|s| tk!(s.to_string()))
28                .collect()
29        }
30    }
31}
32
33fn parse_arg(arg: Pair<Rule>) -> Option<Token> {
34    let (raw_str, data) = match arg.as_rule() {
35        Rule::string => (None, arg.as_str().to_string()),
36        Rule::sq_string => (
37            Some(arg.as_str().to_string()),
38            arg.into_inner()
39                .next()
40                .unwrap()
41                .as_str()
42                .replace("\\'", "'"),
43        ),
44        Rule::dq_string => (
45            Some(arg.as_str().to_string()),
46            arg.into_inner()
47                .next()
48                .unwrap()
49                .as_str()
50                .replace("\\\"", "\""),
51        ),
52        _ => return None,
53    };
54    Some(Token {
55        data: data.into(),
56        raw_str,
57        ..Default::default()
58    })
59}
60
61#[cfg(test)]
62mod tests {
63    use super::*;
64
65    #[test]
66    fn test_tokenize() {
67        let tokenizer = Tokenizer::new();
68        assert_eq!(
69            tokenizer.tokenize(" What's   the time "),
70            tk!(["What's", "the", "time"])
71        );
72        assert_eq!(
73            tokenizer.tokenize(r#" "Hello world" to md5 "#),
74            vec![
75                Token {
76                    data: "Hello world".into(),
77                    raw_str: Some("\"Hello world\"".into()),
78                    ..Default::default()
79                },
80                tk!("to"),
81                tk!("md5")
82            ]
83        );
84        assert_eq!(
85            tokenizer.tokenize(r#" (1 + 1) * 2 "#),
86            tk!(["(1", "+", "1)", "*", "2"])
87        );
88        assert_eq!(
89            tokenizer.tokenize(r#" " \" \" " "#),
90            vec![Token {
91                data: " \" \" ".into(),
92                raw_str: Some(r#"" \" \" ""#.into()),
93                ..Default::default()
94            }]
95        );
96        assert_eq!(tokenizer.tokenize(" #ffffff "), tk!(["#ffffff"]));
97        assert_eq!(
98            tokenizer.tokenize(
99                r#" "Hello world"
100             to md5 "#
101            ),
102            vec![
103                Token {
104                    data: "Hello world".into(),
105                    raw_str: Some("\"Hello world\"".into()),
106                    ..Default::default()
107                },
108                tk!("to"),
109                tk!("md5")
110            ]
111        );
112    }
113}