1#[derive(Debug, Clone, PartialEq)]
2pub enum Token {
3 Read,
4 Write,
5 Print,
6 Append,
7 Identifier(String),
8 StringLiteral(String),
9 IntLiteral(i64),
10 FloatLiteral(f64),
11 Arrow,
12 Eol,
13}
14
15pub fn tokenize(source: &str) -> Result<Vec<Token>, String> {
16 let mut tokens = Vec::new();
17 let mut chars = source.chars().peekable();
18
19 while let Some(ch) = chars.next() {
20 let token = match ch {
21 ' ' | '\t' | '\r' => continue,
22 '\n' => {
23 tokens.push(Token::Eol);
24 continue;
25 }
26 '#' => {
27 while let Some(c) = chars.peek() {
28 if *c == '\n' {
29 break;
30 }
31 chars.next();
32 }
33 continue;
34 }
35 '-' => {
36 let next_char = chars.next();
37 if next_char == Some('>') {
38 Token::Arrow
39 } else {
40 return Err(format!("Unexpected character: '{:?}'", next_char));
41 }
42 }
43 '"' => {
44 let mut s = String::new();
45 while let Some(c) = chars.next() {
46 match c {
47 '\\' => {
48 if let Some(next_char) = chars.next() {
49 match next_char {
50 'n' => s.push('\n'),
51 't' => s.push('\t'),
52 'r' => s.push('\r'),
53 '"' => s.push('"'),
54 '\\' => s.push('\\'),
55 _ => {
56 return Err(format!(
57 "Invalid escape sequence: \\{}",
58 next_char
59 ))
60 }
61 }
62 } else {
63 return Err("Unexpected end of input.".to_string());
64 }
65 }
66 '"' => break,
67 _ => s.push(c),
68 }
69 }
70 tokens.push(Token::StringLiteral(s));
71 continue;
72 }
73 '.' => {
74 if let Some(&ch) = chars.peek() {
75 if ch.is_numeric() {
76 let mut number = String::new();
77 number.push(ch);
78 chars.next();
79
80 while let Some(ch) = chars.peek() {
81 if ch.is_numeric() {
82 number.push(*ch);
83 chars.next();
84 } else {
85 break;
86 }
87 }
88
89 match number.parse::<f64>() {
90 Ok(n) => Token::FloatLiteral(n),
91 Err(_) => return Err(format!("Invalid number: {}", number)),
92 }
93 } else {
94 Token::Identifier(".".to_string())
95 }
96 } else {
97 Token::Identifier(".".to_string())
98 }
99 }
100 '0'..='9' => {
101 let mut number = String::new();
102 number.push(ch);
103
104 while let Some(c) = chars.peek() {
105 if c.is_numeric() {
106 number.push(*c);
107 chars.next();
108 } else {
109 break;
110 }
111 }
112
113 let mut is_float = false;
114 if let Some('.') = chars.peek() {
115 is_float = true;
116 number.push('.');
117 chars.next();
118
119 while let Some(c) = chars.peek() {
120 if c.is_numeric() {
121 number.push(*c);
122 chars.next();
123 } else {
124 break;
125 }
126 }
127 }
128
129 if is_float {
130 tokens.push(Token::FloatLiteral(number.parse::<f64>().unwrap()));
131 } else {
132 tokens.push(Token::IntLiteral(number.parse::<i64>().unwrap()));
133 }
134 continue;
135 }
136 'a'..='z' | 'A'..='Z' | '_' => {
137 let mut identifier = String::new();
138 identifier.push(ch);
139
140 while let Some(c) = chars.peek() {
141 if c.is_alphanumeric() || c == &'_' || c == &'.' {
142 identifier.push(*c);
143 chars.next();
144 } else {
145 break;
146 }
147 }
148 match identifier.as_str() {
149 "READ" => Token::Read,
150 "WRITE" => Token::Write,
151 "PRINT" => Token::Print,
152 "APPEND" => Token::Append,
153 _ => Token::Identifier(identifier),
154 }
155 }
156 _ => return Err(format!("Unexpected character: '{}'.", ch)),
157 };
158 tokens.push(token);
159 }
160 Ok(tokens)
161}
162
163#[cfg(test)]
164mod tests {
165 use super::*;
166
167 #[test]
168 fn test_lexer_basic() {
169 let source = r#"READ input.txt -> content
170WRITE output.txt "Hello, World!"
171PRINT "Hello, World!"
172APPEND var1 var2 -> result
173"#;
174
175 let tokens = tokenize(source).unwrap();
176 let expected = vec![
177 Token::Read,
178 Token::Identifier("input.txt".to_string()),
179 Token::Arrow,
180 Token::Identifier("content".to_string()),
181 Token::Eol,
182 Token::Write,
183 Token::Identifier("output.txt".to_string()),
184 Token::StringLiteral("Hello, World!".to_string()),
185 Token::Eol,
186 Token::Print,
187 Token::StringLiteral("Hello, World!".to_string()),
188 Token::Eol,
189 Token::Append,
190 Token::Identifier("var1".to_string()),
191 Token::Identifier("var2".to_string()),
192 Token::Arrow,
193 Token::Identifier("result".to_string()),
194 Token::Eol,
195 ];
196 assert_eq!(tokens, expected);
197 }
198}