1use super::tk;
2use super::token::*;
3use pest::{iterators::Pair, Parser};
4
5mod parser {
6 #[derive(pest_derive::Parser)]
7 #[grammar = "token.pest"]
8 pub struct TokenParser;
9}
10
11use parser::*;
12
13#[derive(Default)]
14pub struct Tokenizer {}
15
16impl Tokenizer {
17 pub fn new() -> Self {
18 Default::default()
19 }
20
21 pub fn tokenize(&self, input: &str) -> Vec<Token> {
22 if let Ok(args) = TokenParser::parse(Rule::args, input) {
23 args.filter_map(parse_arg).collect()
24 } else {
25 input
26 .split_whitespace()
27 .map(|s| tk!(s.to_string()))
28 .collect()
29 }
30 }
31}
32
33fn parse_arg(arg: Pair<Rule>) -> Option<Token> {
34 let (raw_str, data) = match arg.as_rule() {
35 Rule::string => (None, arg.as_str().to_string()),
36 Rule::sq_string => (
37 Some(arg.as_str().to_string()),
38 arg.into_inner()
39 .next()
40 .unwrap()
41 .as_str()
42 .replace("\\'", "'"),
43 ),
44 Rule::dq_string => (
45 Some(arg.as_str().to_string()),
46 arg.into_inner()
47 .next()
48 .unwrap()
49 .as_str()
50 .replace("\\\"", "\""),
51 ),
52 _ => return None,
53 };
54 Some(Token {
55 data: data.into(),
56 raw_str,
57 ..Default::default()
58 })
59}
60
61#[cfg(test)]
62mod tests {
63 use super::*;
64
65 #[test]
66 fn test_tokenize() {
67 let tokenizer = Tokenizer::new();
68 assert_eq!(
69 tokenizer.tokenize(" What's the time "),
70 tk!(["What's", "the", "time"])
71 );
72 assert_eq!(
73 tokenizer.tokenize(r#" "Hello world" to md5 "#),
74 vec![
75 Token {
76 data: "Hello world".into(),
77 raw_str: Some("\"Hello world\"".into()),
78 ..Default::default()
79 },
80 tk!("to"),
81 tk!("md5")
82 ]
83 );
84 assert_eq!(
85 tokenizer.tokenize(r#" (1 + 1) * 2 "#),
86 tk!(["(1", "+", "1)", "*", "2"])
87 );
88 assert_eq!(
89 tokenizer.tokenize(r#" " \" \" " "#),
90 vec![Token {
91 data: " \" \" ".into(),
92 raw_str: Some(r#"" \" \" ""#.into()),
93 ..Default::default()
94 }]
95 );
96 assert_eq!(tokenizer.tokenize(" #ffffff "), tk!(["#ffffff"]));
97 assert_eq!(
98 tokenizer.tokenize(
99 r#" "Hello world"
100 to md5 "#
101 ),
102 vec![
103 Token {
104 data: "Hello world".into(),
105 raw_str: Some("\"Hello world\"".into()),
106 ..Default::default()
107 },
108 tk!("to"),
109 tk!("md5")
110 ]
111 );
112 }
113}