1use rust_forth_tokenizer::ForthToken;
2use rust_forth_tokenizer::ForthTokenizer;
3pub use rust_simple_stack_processor::GasLimit;
4use rust_simple_stack_processor::Opcode;
5use rust_simple_stack_processor::StackMachine;
6
7mod error;
8
9pub use error::ForthError;
10use std::collections::HashMap;
11use std::convert::TryFrom;
12use std::convert::TryInto;
13
14#[cfg(test)]
15mod tests;
16
17macro_rules! hashmap {
19 ($( $key: expr => $val: expr ),*) => {{
20 let mut map = ::std::collections::HashMap::new();
21 $( map.insert($key, $val); )*
22 map
23 }}
24}
25
26pub struct ForthCompiler {
27 pub sm: StackMachine,
29 intrinsic_words: HashMap<&'static str, Vec<Opcode>>,
31 #[cfg(feature = "enable_reflection")]
35 pub word_addresses: HashMap<String, usize>,
36 #[cfg(not(feature = "enable_reflection"))]
37 word_addresses: HashMap<String, usize>,
38 last_function: usize,
41 #[cfg(feature = "enable_reflection")]
43 pub word_definitions: HashMap<String, String>,
44 #[cfg(feature = "enable_reflection")]
46 pub word_opcodes: HashMap<String, Vec<Opcode>>,
47}
48
49impl Default for ForthCompiler {
50 fn default() -> ForthCompiler {
51 ForthCompiler {
52 sm: StackMachine::default(),
53 intrinsic_words: hashmap![
54 "SWAP" => vec![Opcode::SWAP],
55 "NOT" => vec![Opcode::NOT],
56 "ADD" => vec![Opcode::ADD],
57 "SUB" => vec![Opcode::SUB],
58 "MUL" => vec![Opcode::MUL],
59 "DIV" => vec![Opcode::DIV],
60 "DUP" => vec![Opcode::DUP],
61 "2DUP" => vec![Opcode::DUP2],
62 "TRAP" => vec![Opcode::TRAP],
63 "DROP" => vec![Opcode::DROP],
64 "2DROP" => vec![Opcode::DROP,Opcode::DROP],
65 "2OVER" => vec![Opcode::OVER2],
66 "2SWAP" => vec![Opcode::SWAP2],
67 "1+" => vec![Opcode::LDI(1),Opcode::ADD],
68 "1-" => vec![Opcode::LDI(-1),Opcode::ADD],
69 "2+" => vec![Opcode::LDI(2),Opcode::ADD],
70 "2-" => vec![Opcode::LDI(-2),Opcode::ADD],
71 "2*" => vec![Opcode::LDI(2),Opcode::MUL],
72 "2/" => vec![Opcode::LDI(2),Opcode::DIV],
73 "I" => vec![Opcode::GETLP],
74 "J" => vec![Opcode::GETLP2],
75 "AND" => vec![Opcode::AND],
76 "=" => vec![Opcode::SUB,Opcode::CMPZ],
77 "<>" => vec![Opcode::SUB,Opcode::CMPNZ]
78 ],
79 word_addresses: HashMap::new(),
80 last_function: 0,
81 #[cfg(feature = "enable_reflection")]
82 word_definitions: HashMap::new(),
83 #[cfg(feature = "enable_reflection")]
84 word_opcodes: HashMap::new(),
85 }
86 }
87}
88
89#[derive(Debug)]
91struct DeferredIfStatement {
92 if_location: usize,
93 else_location: Option<usize>,
94}
95
96impl DeferredIfStatement {
97 pub fn new(if_location: usize) -> DeferredIfStatement {
98 DeferredIfStatement {
99 if_location,
100 else_location: None,
101 }
102 }
103}
104
105#[derive(Debug)]
107struct DeferredDoLoopStatement {
108 _prelude_start: usize, logical_start: usize,
110}
111
112impl DeferredDoLoopStatement {
113 pub fn new(_prelude_start: usize, logical_start: usize) -> DeferredDoLoopStatement {
114 DeferredDoLoopStatement {
115 _prelude_start,
116 logical_start,
117 }
118 }
119}
120
121#[derive(Debug)]
122struct LoopExits {
123 loop_exit_locations: Vec<usize>,
124}
125
126impl LoopExits {
127 pub fn new() -> LoopExits {
128 LoopExits {
129 loop_exit_locations: Vec::new(),
130 }
131 }
132
133 pub fn add_exit_point(&mut self, loop_exit_location: usize) {
134 self.loop_exit_locations.push(loop_exit_location);
135 }
136
137 fn fixup_loop_exits(&self, opcode_vector: &mut Vec<Opcode>) {
138 let loop_exit_point = opcode_vector.len();
139 for leave_point in self.loop_exit_locations.iter() {
140 let jump_forward =
141 i64::try_from(loop_exit_point).unwrap() - i64::try_from(*leave_point).unwrap() - 1;
142 opcode_vector[*leave_point] = Opcode::LDI(jump_forward);
143 }
144 }
145}
146
147#[derive(Debug)]
148struct DeferredBeginLoopStatement {
149 logical_start: usize,
150}
151
152impl DeferredBeginLoopStatement {
153 pub fn new(logical_start: usize) -> DeferredBeginLoopStatement {
154 DeferredBeginLoopStatement { logical_start }
155 }
156}
157
158enum DeferredStatement {
159 If(DeferredIfStatement),
160 DoLoop(DeferredDoLoopStatement, LoopExits),
161 BeginLoop(DeferredBeginLoopStatement, LoopExits),
162}
163
164impl ForthCompiler {
165 fn compile_tokens_compile_and_remove_word_definitions(
166 &mut self,
167 token_source: &ForthTokenizer,
168 ) -> Result<Vec<Opcode>, ForthError> {
169 let mut tvi = Vec::new();
171
172 let mut iter = token_source.into_iter();
174 while let Some(token) = iter.next() {
175 match token {
176 ForthToken::Colon => {
178 if let Some(ForthToken::Command(word_name)) = iter.next() {
180 let mut tvc = Vec::new();
182 let mut found_semicolon = false;
183 while let Some(token) = iter.next() {
185 match token {
186 ForthToken::SemiColon => {
187 self.compile_tokens_as_word(word_name, &tvc)?;
189 found_semicolon = true;
190 break;
191 }
192 _ => tvc.push(token),
193 }
194 }
195 if !found_semicolon {
196 return Err(ForthError::MissingSemicolonAfterColon);
197 }
198 } else {
199 return Err(ForthError::MissingCommandAfterColon);
201 }
202 }
203 ForthToken::SemiColon => {
204 return Err(ForthError::SemicolonBeforeColon);
205 }
206 _ => {
207 tvi.push(token);
208 }
209 }
210 }
211
212 let mut compiled_tokens = self.compile_token_vector(&tvi)?;
213
214 compiled_tokens.push(Opcode::RET);
216
217 Ok(compiled_tokens)
218 }
219
220 fn compile_tokens_as_word(
221 &mut self,
222 word_name: &str,
223 tokens: &[ForthToken],
224 ) -> Result<(), ForthError> {
225 self.sm.st.opcodes.resize(self.last_function, Opcode::NOP);
228
229 let mut compiled = self.compile_token_vector(tokens)?;
231 compiled.push(Opcode::RET);
233 let function_start = self.last_function;
235 self.last_function += compiled.len();
237 #[cfg(feature = "enable_reflection")]
239 self.word_opcodes
240 .insert(word_name.to_owned(), compiled.clone());
241 self.sm.st.opcodes.append(&mut compiled);
243 self.word_addresses
245 .insert(word_name.to_owned(), function_start);
246 #[cfg(feature = "enable_reflection")]
250 self.word_definitions
251 .insert(word_name.to_owned(), format!("{:?}", tokens));
252 Ok(())
253 }
254
255 fn compile_token_vector(
256 &mut self,
257 token_vector: &[ForthToken],
258 ) -> Result<Vec<Opcode>, ForthError> {
259 let mut deferred_statements = Vec::new();
261 let mut tv: Vec<Opcode> = Vec::new();
263
264 for t in token_vector.iter() {
266 match t {
267 ForthToken::DropLineComment(_) => (),
268 ForthToken::ParenthesizedRemark(_) => (),
269 ForthToken::StringCommand(_, _) => (),
270 ForthToken::Number(n) => {
271 tv.push(Opcode::LDI(*n));
273 }
274 ForthToken::Command(s) => {
275 let current_instruction = tv.len();
277
278 match s.as_ref() {
279 "DO" => {
280 let start_of_loop_code = current_instruction;
281 tv.push(Opcode::PUSHLP);
283 let logical_start_of_loop = tv.len();
284 deferred_statements.push(DeferredStatement::DoLoop(
285 DeferredDoLoopStatement::new(
286 start_of_loop_code,
287 logical_start_of_loop,
288 ),
289 LoopExits::new(),
290 ));
291 }
292 "LOOP" => {
293 if let Some(DeferredStatement::DoLoop(loop_def, loop_exits)) =
294 deferred_statements.pop()
295 {
296 let jump_back = i64::try_from(loop_def.logical_start).unwrap()
297 - i64::try_from(current_instruction).unwrap()
298 - 3;
300 tv.push(Opcode::INCLP);
301 tv.push(Opcode::CMPLOOP);
302 tv.push(Opcode::LDI(jump_back));
303 tv.push(Opcode::JRZ);
304
305 loop_exits.fixup_loop_exits(&mut tv);
306 } else {
307 return Err(ForthError::InvalidSyntax(
308 "LOOP without proper loop start like DO".to_owned(),
309 ));
310 }
311 tv.push(Opcode::DROPLP);
312 }
313 "+LOOP" => {
314 if let Some(DeferredStatement::DoLoop(loop_def, loop_exits)) =
315 deferred_statements.pop()
316 {
317 let jump_back = i64::try_from(loop_def.logical_start).unwrap()
318 - i64::try_from(current_instruction).unwrap()
319 - 3;
321 tv.push(Opcode::ADDLP);
322 tv.push(Opcode::CMPLOOP);
323 tv.push(Opcode::LDI(jump_back));
324 tv.push(Opcode::JRZ);
325
326 loop_exits.fixup_loop_exits(&mut tv);
327 } else {
328 return Err(ForthError::InvalidSyntax(
329 "+LOOP without proper loop start like DO".to_owned(),
330 ));
331 }
332 tv.push(Opcode::DROPLP);
333 }
334 "LEAVE" => {
335 let most_recent_loop_statement =
336 deferred_statements.iter_mut().rev().find(|x| match **x {
337 DeferredStatement::If(_) => false,
338 DeferredStatement::DoLoop(_, _) => true,
339 DeferredStatement::BeginLoop(_, _) => true,
340 });
341 if let Some(deferred_statement) = most_recent_loop_statement {
342 let loop_exits =
343 match deferred_statement {
344 DeferredStatement::DoLoop(_, loop_exits) => loop_exits,
345 DeferredStatement::BeginLoop(_, loop_exits) => loop_exits,
346 _ => return Err(ForthError::InvalidSyntax(
347 "LEAVE without proper loop start like DO or BEGIN(1)"
348 .to_owned(),
349 )),
350 };
351 loop_exits.add_exit_point(current_instruction);
353
354 tv.push(Opcode::LDI(0));
356 tv.push(Opcode::JR);
357 } else {
358 return Err(ForthError::InvalidSyntax(
359 "LEAVE without proper loop start like DO or BEGIN(2)"
360 .to_owned(),
361 ));
362 }
363 }
364 "BEGIN" => {
365 deferred_statements.push(DeferredStatement::BeginLoop(
366 DeferredBeginLoopStatement::new(current_instruction),
367 LoopExits::new(),
368 ));
369 }
370 "UNTIL" => {
371 if let Some(DeferredStatement::BeginLoop(loop_def, loop_exits)) =
372 deferred_statements.pop()
373 {
374 let jump_back = i64::try_from(loop_def.logical_start).unwrap()
375 - i64::try_from(current_instruction).unwrap()
376 - 1;
378 tv.push(Opcode::LDI(jump_back));
379 tv.push(Opcode::JRZ);
380
381 loop_exits.fixup_loop_exits(&mut tv);
382 } else {
383 return Err(ForthError::InvalidSyntax(
384 "UNTIL without proper loop start like BEGIN".to_owned(),
385 ));
386 }
387 }
388 "WHILE" => {
389 if let Some(DeferredStatement::BeginLoop(_loop_def, loop_exits)) =
390 deferred_statements.last_mut()
391 {
392 loop_exits.add_exit_point(current_instruction);
393 tv.push(Opcode::LDI(0));
395 tv.push(Opcode::JRZ);
396 } else {
397 return Err(ForthError::InvalidSyntax(
398 "WHILE without proper loop start like BEGIN".to_owned(),
399 ));
400 }
401 }
402 "REPEAT" => {
403 if let Some(DeferredStatement::BeginLoop(loop_def, loop_exits)) =
404 deferred_statements.pop()
405 {
406 let jump_back = i64::try_from(loop_def.logical_start).unwrap()
407 - i64::try_from(current_instruction).unwrap()
408 - 1;
410 tv.push(Opcode::LDI(jump_back));
411 tv.push(Opcode::JR);
412
413 loop_exits.fixup_loop_exits(&mut tv);
414 } else {
415 return Err(ForthError::InvalidSyntax(
416 "AGAIN without proper loop start like BEGIN".to_owned(),
417 ));
418 }
419 }
420 "AGAIN" => {
421 if let Some(DeferredStatement::BeginLoop(loop_def, loop_exits)) =
422 deferred_statements.pop()
423 {
424 let jump_back = i64::try_from(loop_def.logical_start).unwrap()
425 - i64::try_from(current_instruction).unwrap()
426 - 1;
428 tv.push(Opcode::LDI(jump_back));
429 tv.push(Opcode::JR);
430
431 loop_exits.fixup_loop_exits(&mut tv);
432 } else {
433 return Err(ForthError::InvalidSyntax(
434 "AGAIN without proper loop start like BEGIN".to_owned(),
435 ));
436 }
437 }
438 "IF" => {
440 deferred_statements.push(DeferredStatement::If(
441 DeferredIfStatement::new(current_instruction),
442 ));
443 tv.push(Opcode::LDI(0));
445 tv.push(Opcode::JRZ);
446 }
447 "ELSE" => {
448 if let Some(DeferredStatement::If(x)) = deferred_statements.last_mut() {
449 x.else_location = Some(current_instruction);
450 tv.push(Opcode::LDI(0));
452 tv.push(Opcode::JR);
453 } else {
454 return Err(ForthError::InvalidSyntax(
455 "ELSE without IF".to_owned(),
456 ));
457 }
458 }
459 "THEN" => {
460 if let Some(DeferredStatement::If(x)) = deferred_statements.pop() {
463 let if_jump_location = x.if_location;
465 let if_jump_offset = match x.else_location {
466 None => (current_instruction as u64
467 - (x.if_location + 1) as u64)
468 .try_into()
469 .unwrap(),
470 Some(el) => (current_instruction as u64 - el as u64 + 1)
471 .try_into()
472 .unwrap(),
473 };
474 let (else_jump_location, else_jump_offset): (
475 Option<usize>,
476 Option<i64>,
477 ) = match x.else_location {
478 Some(x) => (
479 Some(x),
480 Some(
481 i64::try_from(
482 current_instruction as u64 - (x + 1) as u64,
483 )
484 .unwrap(),
485 ),
486 ),
487 None => (None, None),
488 };
489 tv[if_jump_location] = Opcode::LDI(if_jump_offset);
491 if let (Some(location), Some(offset)) =
492 (else_jump_location, else_jump_offset)
493 {
494 tv[location] = Opcode::LDI(offset);
495 }
496 } else {
497 return Err(ForthError::InvalidSyntax(
498 "THEN without IF".to_owned(),
499 ));
500 }
501 }
502 _ => {
503 if let Some(offset) = self.word_addresses.get(*s) {
504 tv.push(Opcode::LDI(*offset as i64));
505 tv.push(Opcode::CALL);
506 } else if let Some(ol) = self.intrinsic_words.get::<str>(s) {
507 tv.append(&mut ol.clone());
508 } else {
509 return Err(ForthError::UnknownToken(s.to_string()));
510 }
511 }
512 }
513 }
514 ForthToken::Colon => {
515 panic!("Colon should never reach this function");
516 }
517 ForthToken::SemiColon => {
518 panic!("SemiColon should never reach this function");
519 }
520 }
521 }
522
523 Ok(tv)
524 }
525
526 fn execute_tokens(
527 &mut self,
528 token_source: &ForthTokenizer,
529 gas_limit: GasLimit,
530 ) -> Result<(), ForthError> {
531 let mut ol = self.compile_tokens_compile_and_remove_word_definitions(token_source)?;
532 self.sm.st.opcodes.resize(self.last_function, Opcode::NOP);
534 self.sm.st.opcodes.append(&mut ol);
535 self.sm.execute(self.last_function, gas_limit)?;
536 Ok(())
540 }
541
542 pub fn execute_string(&mut self, s: &str, gas_limit: GasLimit) -> Result<(), ForthError> {
543 let tokenizer = ForthTokenizer::new(&s);
544 self.execute_tokens(&tokenizer, gas_limit)?;
545 Ok(())
546 }
547}