rail_lang/
tokens.rs

1use regex::Regex;
2
3#[derive(Clone, Debug, PartialEq)]
4pub enum Token {
5    LeftBracket,
6    RightBracket,
7    Boolean(bool),
8    I64(i64),
9    F64(f64),
10    Term(std::string::String),
11    DeferredTerm(std::string::String),
12    String(std::string::String),
13    None,
14}
15
16use Token::*;
17
18impl From<std::string::String> for Token {
19    fn from(tok: std::string::String) -> Self {
20        if &tok == "[" {
21            LeftBracket
22        } else if &tok == "]" {
23            RightBracket
24        } else if &tok == "true" {
25            Boolean(true)
26        } else if &tok == "false" {
27            Boolean(false)
28        } else if &tok != "\"" && tok.starts_with('"') && tok.ends_with('"') {
29            let s = tok
30                .strip_prefix('"')
31                .unwrap()
32                .strip_suffix('"')
33                .unwrap()
34                .replace("\\n", "\n");
35            String(s)
36        } else if let Ok(i) = tok.parse::<i64>() {
37            I64(i)
38        } else if let Ok(n) = tok.parse::<f64>() {
39            F64(n)
40        } else if tok.starts_with('\\') {
41            let term = tok.strip_prefix('\\').unwrap().trim().to_string();
42            if term.is_empty() {
43                None
44            } else {
45                DeferredTerm(term)
46            }
47        } else {
48            Term(tok)
49        }
50    }
51}
52
53pub fn tokenize(line: &str) -> Vec<Token> {
54    // TODO: Validate that a line does not contain unterminated strings.
55    // TODO: Handle character escapes for quotes, newlines, etc. (But here?)
56    let re: Regex = Regex::new(r#"(".*?"|\[|\]|[^\s\[\]]*)"#).unwrap();
57    let line = line.replace('\n', " ");
58    re.captures_iter(&line)
59        .flat_map(|cap| cap.iter().take(1).collect::<Vec<_>>())
60        .filter_map(|res| res.map(|mat| mat.as_str()))
61        .take_while(|s| !s.starts_with('#'))
62        .filter(|s| !s.is_empty())
63        .map(|s| s.to_owned())
64        .map(Token::from)
65        .collect()
66}
67
68#[test]
69fn token_test() {
70    let actual = "1 1 +";
71    let expected = vec![I64(1), I64(1), Term("+".into())];
72
73    assert_eq!(expected, tokenize(actual));
74}
75
76#[test]
77fn token_test_2() {
78    let actual = "\"hello\" \"there\"";
79    let expected = vec![String("hello".into()), String("there".into())];
80
81    assert_eq!(expected, tokenize(actual));
82}
83
84#[test]
85fn token_test_3() {
86    let actual = "\"hello there\"";
87    let expected = vec![String("hello there".into())];
88
89    assert_eq!(expected, tokenize(actual));
90}
91
92#[test]
93fn token_test_4() {
94    let actual = "\" hello there \"";
95    let expected = vec![String(" hello there ".into())];
96
97    assert_eq!(expected, tokenize(actual));
98}
99
100#[test]
101fn token_test_5() {
102    let actual = "1 2 \" hello three \" 4 5";
103    let expected = vec![
104        I64(1),
105        I64(2),
106        String(" hello three ".into()),
107        I64(4),
108        I64(5),
109    ];
110
111    assert_eq!(expected, tokenize(actual));
112}
113
114#[test]
115fn token_test_6() {
116    let actual = "1 2 \"a # in a string is fine\" #but at the end is ignored";
117    let expected = vec![I64(1), I64(2), String("a # in a string is fine".into())];
118
119    assert_eq!(expected, tokenize(actual));
120}
121
122#[test]
123fn token_test_7() {
124    let actual = "1 1 [ + ] call .s";
125    let expected = vec![
126        I64(1),
127        I64(1),
128        LeftBracket,
129        Term("+".into()),
130        RightBracket,
131        Term("call".into()),
132        Term(".s".into()),
133    ];
134
135    assert_eq!(expected, tokenize(actual));
136}
137
138#[test]
139fn token_test_8() {
140    let actual = "1 1 [+] call .s";
141    let expected = vec![
142        I64(1),
143        I64(1),
144        LeftBracket,
145        Term("+".into()),
146        RightBracket,
147        Term("call".into()),
148        Term(".s".into()),
149    ];
150
151    assert_eq!(expected, tokenize(actual));
152}
153
154#[test]
155fn token_test_9() {
156    let actual = "[1 1][+]doin .s";
157    let expected = vec![
158        LeftBracket,
159        I64(1),
160        I64(1),
161        RightBracket,
162        LeftBracket,
163        Term("+".into()),
164        RightBracket,
165        Term("doin".into()),
166        Term(".s".into()),
167    ];
168
169    assert_eq!(expected, tokenize(actual));
170}
171
172#[test]
173fn token_test_10() {
174    let actual = "1 \\dup do .s";
175    let expected = vec![
176        I64(1),
177        DeferredTerm("dup".into()),
178        Term("do".into()),
179        Term(".s".into()),
180    ];
181
182    assert_eq!(expected, tokenize(actual));
183}
184
185#[test]
186fn token_test_11() {
187    let actual = "1 \\num -> num pl";
188    let expected = vec![
189        I64(1),
190        DeferredTerm("num".into()),
191        Term("->".into()),
192        Term("num".into()),
193        Term("pl".into()),
194    ];
195
196    assert_eq!(expected, tokenize(actual));
197}