expresso/
tokenizer.rs

1
2use crate::scanner::Scanner;
3use crate::operator::{Operator,OPERATORS};
4
5#[derive(Debug,PartialEq,Clone)]
6pub enum Token {
7	Literal( String ),
8	OperandI32( i32 ),
9	OperandF32( f32 ),
10	StringLiteral( String ),
11	Variable( String ),
12	Operator( Operator ),
13	BraceLeft,
14	BraceRight,
15	Whitespace,
16	EOF,
17	ERROR( String ),
18
19	// :HACK: clean me up
20	FunctionCall,
21}
22
23#[derive(Debug)]
24pub struct Tokenizer<'a> {
25	scanner: Scanner<'a>,
26}
27
28impl<'a> Tokenizer<'a> {
29	pub fn new( scanner: Scanner<'a> ) -> Self {
30		Self {
31			scanner,
32		}
33	}
34
35	pub fn empty( &self ) -> bool {
36		self.scanner.empty()
37	}
38
39	fn as_digit( s: &str ) -> Option< i32 > {
40		match s {
41			"0" => Some( 0 ),
42			"1" => Some( 1 ),
43			"2" => Some( 2 ),
44			"3" => Some( 3 ),
45			"4" => Some( 4 ),
46			"5" => Some( 5 ),
47			"6" => Some( 6 ),
48			"7" => Some( 7 ),
49			"8" => Some( 8 ),
50			"9" => Some( 9 ),
51			_ => None,
52		}
53	}
54
55	fn is_allowed_in_literal( s: &str ) -> bool {
56		if Tokenizer::is_alphanumeric( s ) {
57			true
58		} else {
59			["_"].contains( &s )
60		}
61	}
62	fn is_alphabetic( s: &str ) -> bool {
63		let mut chars = s.chars();
64		if let Some( c ) = chars.next() {
65			if c.is_ascii_alphabetic() {
66				true
67			} else {
68				false
69			}
70		} else {
71			false
72		}
73	}
74
75	fn is_alphanumeric( s: &str ) -> bool {
76		let mut chars = s.chars();
77		if let Some( c ) = chars.next() {
78			if c.is_ascii_alphanumeric() {
79				true
80			} else {
81				false
82			}
83		} else {
84			false
85		}
86	}
87
88	fn is_whitespace( s: &str ) -> bool {
89		match s {
90			" " => true,
91			_ => false,
92		}
93	}
94
95	// :HACK: !!!!!!!!!!!!!!!!!
96	fn next_operator( &mut self ) -> Option< Operator > { // :HACK: resolve operator handling next time
97		let s = self.scanner.peek();	// :HACK: and more hacking, we need multi character operators
98		for o in OPERATORS.iter() {
99			if o.literal == s {
100				self.scanner.pop();
101				return Some( o.clone() )
102			}
103		}
104		None
105	}
106
107	fn next_i32( &mut self ) -> Option< i32 > {
108		let mut c = self.scanner.peek();
109		let mut v = 0;
110		let mut is_valid = false;
111
112		while let Some( d ) = Tokenizer::as_digit( c ) {
113			v = v * 10 + d;
114			is_valid = true;
115			self.scanner.pop();
116			c = self.scanner.peek();
117		};
118
119		if is_valid {
120			Some( v )
121		} else {
122			None
123		}
124	}
125
126	fn get_number_of_digits( n: i32 ) -> i32 {
127		if n > 999_999_999 {
128			10
129		} else if n > 99_999_999 {
130			9
131		} else if n > 9_999_999 {
132			8
133		} else if n > 999_999 {
134			7
135		} else if n > 99_999 {
136			6
137		} else if n > 9_999 {
138			5
139		} else if n > 999 {
140			4
141		} else if n > 99 {
142			3
143		} else if n > 9 {
144			2
145		} else {
146			1
147		}
148	}
149
150	fn next_number( &mut self ) -> Option< Token > {
151		if let Some( i ) = self.next_i32() {
152			if "." == self.scanner.peek() {
153				self.scanner.pop();
154				if let Some( j ) = self.next_i32() {
155					// :HACK: but we don't want any dependencies
156//					dbg!(i, j);
157					let f = i as f32;
158					let n = Tokenizer::get_number_of_digits( j );
159					let shift = 10_f32.powf( n as f32 );
160//					dbg!( &n, &shift );
161					let f = f + ( j as f32 / shift );
162					Some( Token::OperandF32( f ) )
163				} else {
164					// dot but no decimal part
165					Some( Token::ERROR( "malformed float".to_string() ) )					
166				}
167			} else {
168				Some( Token::OperandI32( i ) )
169			}
170		} else {
171			None
172		}
173	}
174
175	fn next_whitespace( &mut self ) -> bool {
176		let mut had_whitespace = false;
177		while Tokenizer::is_whitespace( self.scanner.peek() ) {
178			had_whitespace = true;
179			self.scanner.pop();
180		}
181		had_whitespace
182	}
183
184	fn next_literal( &mut self ) -> Option< Token > {
185		let c = self.scanner.peek();
186		if Tokenizer::is_alphabetic( c ) {
187			let mut value = c.to_string();
188			self.scanner.pop();
189
190			let mut c = self.scanner.peek();
191			while Tokenizer::is_allowed_in_literal( c ) {
192				value = value + c;
193				self.scanner.pop();
194				c = self.scanner.peek();
195			};
196
197			Some( Token::Literal( value ) )
198
199		} else {
200			None
201		}
202	}
203
204	fn next_variable( &mut self ) -> Option< Token > {
205		if self.scanner.peek() == "$" {
206			self.scanner.pop();
207
208			let mut name = String::new();
209
210
211			let mut c = self.scanner.peek();
212			while Tokenizer::is_alphanumeric( c ) || c == "_" {	// :TODO: allow more characters in variable names
213				name = name + c;
214				self.scanner.pop();
215				c = self.scanner.peek();
216			};
217
218			if name.len() > 0 {
219				Some( Token::Variable( name ) )
220			} else {
221				Some( Token::ERROR( format!("Missing variable name. $ followed by {}", c).to_string() ) )
222			}
223		} else {
224			None
225		}
226	}
227
228	fn next_brace( &mut self ) -> Option< Token > {
229		let c = self.scanner.peek();
230		match c {
231			"(" => {
232				self.scanner.pop();
233				Some( Token::BraceLeft )
234			},
235			")" => {
236				self.scanner.pop();
237				Some( Token::BraceRight )
238			},
239			_ => None,
240		}
241	}
242
243	fn next_string_literal( &mut self ) -> Option< Token > {
244		if self.scanner.peek() == "\"" {
245			self.scanner.pop();
246
247			let mut value = String::new();
248
249			let mut c = self.scanner.peek();
250			while c != "" {
251//				dbg!(&self.scanner);
252				if c != "\"" {
253					value = value + c;
254					self.scanner.pop();
255				} else {			// closing quote found		
256					self.scanner.pop();
257					return Some( Token::StringLiteral( value ) )
258				}
259				c = self.scanner.peek();
260			};
261			Some( Token::ERROR( "Unterminated string found".to_string() ) )
262		} else {
263			None
264		}
265	}
266
267	pub fn next( &mut self ) -> Token {
268		if self.empty() {
269			Token::EOF
270		} else if self.next_whitespace() {
271			Token::Whitespace
272		} else if let Some( l ) = self.next_literal() {
273			l
274		} else if let Some( v ) = self.next_variable() {
275			v
276		} else if let Some( s ) = self.next_string_literal() {
277			s
278		} else if let Some( o ) = self.next_brace() {
279			o
280		} else if let Some( o ) = self.next_operator() {
281			Token::Operator( o )
282		} else if let Some( n ) = self.next_number() {
283			n
284		} else {
285			// :TODO:
286			let mut e = String::new();
287			e += "Parsing failed at:'";
288			println!();
289			while !self.scanner.empty() {
290				e += self.scanner.peek();
291				self.scanner.pop();
292			}
293			e += "'";
294			Token::ERROR( format!( "unhandled token. {}", e ) )
295		}
296	}
297
298	pub fn scanner( &self ) -> &'a Scanner {
299		&self.scanner
300	}
301}