rust_forth_compiler/lib.rs
1use rust_forth_tokenizer::{ForthToken, ForthTokenizer};
2pub use rust_simple_stack_processor::GasLimit;
3use rust_simple_stack_processor::{Opcode, StackMachine};
4use std::collections::HashMap;
5use std::convert::{TryFrom, TryInto};
6
7mod error;
8pub use error::ForthError;
9
10#[cfg(test)]
11mod tests;
12
13pub struct ForthCompiler {
14 // This is the Stack Machine processor that runs the compiled Forth instructions
15 pub sm: StackMachine,
16 // These are the words that we know how to work with regardless, things like DROP, MUL, etc
17 intrinsic_words: HashMap<&'static str, Vec<Opcode>>,
18 // This is where we remember where we put compiled words in the *memory* of the StackMachine
19 // We run the interactive opcodes after these compiled words, and then erase the memory after
20 // the compiled words again for the next batch of interactive opcodes.
21 #[cfg(feature = "enable_reflection")]
22 pub word_addresses: HashMap<String, usize>,
23 #[cfg(not(feature = "enable_reflection"))]
24 word_addresses: HashMap<String, usize>,
25 // This is the location in memory that points to the location after the last compiled opcode
26 // So its an ideal place to run interactive compiled opcodes
27 last_function: usize,
28 // Remember the definition for words
29 #[cfg(feature = "enable_reflection")]
30 pub word_definitions: HashMap<String, String>,
31 // Remember the opcodes for words
32 #[cfg(feature = "enable_reflection")]
33 pub word_opcodes: HashMap<String, Vec<Opcode>>,
34}
35
36impl Default for ForthCompiler {
37 fn default() -> ForthCompiler {
38 ForthCompiler {
39 sm: StackMachine::default(),
40 intrinsic_words: HashMap::from([
41 ("SWAP", vec![Opcode::SWAP]),
42 ("NOT", vec![Opcode::NOT]),
43 ("ADD", vec![Opcode::ADD]),
44 ("SUB", vec![Opcode::SUB]),
45 ("MUL", vec![Opcode::MUL]),
46 ("DIV", vec![Opcode::DIV]),
47 ("DUP", vec![Opcode::DUP]),
48 ("2DUP", vec![Opcode::DUP2]),
49 ("TRAP", vec![Opcode::TRAP]),
50 ("DROP", vec![Opcode::DROP]),
51 ("2DROP", vec![Opcode::DROP, Opcode::DROP]),
52 ("2OVER", vec![Opcode::OVER2]),
53 ("2SWAP", vec![Opcode::SWAP2]),
54 ("1+", vec![Opcode::LDI(1), Opcode::ADD]),
55 ("1-", vec![Opcode::LDI(-1), Opcode::ADD]),
56 ("2+", vec![Opcode::LDI(2), Opcode::ADD]),
57 ("2-", vec![Opcode::LDI(-2), Opcode::ADD]),
58 ("2*", vec![Opcode::LDI(2), Opcode::MUL]),
59 ("2/", vec![Opcode::LDI(2), Opcode::DIV]),
60 ("I", vec![Opcode::GETLP]),
61 ("J", vec![Opcode::GETLP2]),
62 ("AND", vec![Opcode::AND]),
63 ("=", vec![Opcode::SUB, Opcode::CMPZ]),
64 ("<>", vec![Opcode::SUB, Opcode::CMPNZ]),
65 ]),
66 word_addresses: HashMap::new(),
67 last_function: 0,
68 #[cfg(feature = "enable_reflection")]
69 word_definitions: HashMap::new(),
70 #[cfg(feature = "enable_reflection")]
71 word_opcodes: HashMap::new(),
72 }
73 }
74}
75
76// This struct tracks information for Forth IF statements
77#[derive(Debug)]
78struct DeferredIfStatement {
79 if_location: usize,
80 else_location: Option<usize>,
81}
82
83impl DeferredIfStatement {
84 pub fn new(if_location: usize) -> Self {
85 Self {
86 if_location,
87 else_location: None,
88 }
89 }
90}
91
92// This struct tracks information for Forth Loop statements
93#[derive(Debug)]
94struct DeferredDoLoopStatement {
95 _prelude_start: usize, // This is only used for debugging the internal code
96 logical_start: usize,
97}
98
99impl DeferredDoLoopStatement {
100 pub fn new(_prelude_start: usize, logical_start: usize) -> Self {
101 Self {
102 _prelude_start,
103 logical_start,
104 }
105 }
106}
107
108#[derive(Debug)]
109struct LoopExits {
110 loop_exit_locations: Vec<usize>,
111}
112
113impl LoopExits {
114 pub fn new() -> Self {
115 Self {
116 loop_exit_locations: Vec::new(),
117 }
118 }
119
120 pub fn add_exit_point(&mut self, loop_exit_location: usize) {
121 self.loop_exit_locations.push(loop_exit_location);
122 }
123
124 fn fixup_loop_exits(&self, opcode_vector: &mut Vec<Opcode>) {
125 let loop_exit_point = opcode_vector.len();
126 for leave_point in self.loop_exit_locations.iter() {
127 let jump_forward =
128 i64::try_from(loop_exit_point).unwrap() - i64::try_from(*leave_point).unwrap() - 1;
129 opcode_vector[*leave_point] = Opcode::LDI(jump_forward);
130 }
131 }
132}
133
134#[derive(Debug)]
135struct DeferredBeginLoopStatement {
136 logical_start: usize,
137}
138
139impl DeferredBeginLoopStatement {
140 pub fn new(logical_start: usize) -> Self {
141 Self { logical_start }
142 }
143}
144
145enum DeferredStatement {
146 If(DeferredIfStatement),
147 DoLoop(DeferredDoLoopStatement, LoopExits),
148 BeginLoop(DeferredBeginLoopStatement, LoopExits),
149}
150
151impl ForthCompiler {
152 fn compile_tokens_compile_and_remove_word_definitions(
153 &mut self,
154 token_source: &ForthTokenizer,
155 ) -> Result<Vec<Opcode>, ForthError> {
156 // This is the interactive compiled token list
157 let mut tvi = Vec::new();
158
159 // Because we consume tokens in an inner loop, we can't use the normal for loop to read the tokens
160 let mut iter = token_source.into_iter();
161 while let Some(token) = iter.next() {
162 match token {
163 // If a colon token, then compile the word definition
164 ForthToken::Colon => {
165 // Get the next token which has to be a command token, or its an error, this token will be the name to compile to
166 if let Some(ForthToken::Command(word_name)) = iter.next() {
167 // This is the list of tokens we will be compiling
168 let mut tvc = Vec::new();
169 let mut found_semicolon = false;
170 // Because this is an inner loop using the outer iterator, we can't use the normal for loop syntax
171 while let Some(token) = iter.next() {
172 match token {
173 ForthToken::SemiColon => {
174 // We have found the end of the word definition, so compile to opcodes and put into memory...
175 self.compile_tokens_as_word(word_name, &tvc)?;
176 found_semicolon = true;
177 break;
178 }
179 _ => tvc.push(token),
180 }
181 }
182 if !found_semicolon {
183 return Err(ForthError::MissingSemicolonAfterColon);
184 }
185 } else {
186 // The command token has to be right after the colon token, we don't permit things like comments, we could though...
187 return Err(ForthError::MissingCommandAfterColon);
188 }
189 }
190 ForthToken::SemiColon => {
191 return Err(ForthError::SemicolonBeforeColon);
192 }
193 _ => {
194 tvi.push(token);
195 }
196 }
197 }
198
199 let mut compiled_tokens = self.compile_token_vector(&tvi)?;
200
201 // We need to return after running the interactive opcodes, so put the return in now
202 compiled_tokens.push(Opcode::RET);
203
204 Ok(compiled_tokens)
205 }
206
207 fn compile_tokens_as_word(
208 &mut self,
209 word_name: &str,
210 tokens: &[ForthToken],
211 ) -> Result<(), ForthError> {
212 // Remove anything extraneous from the end of the opcode array (*processor memory*),
213 // typically previous immediate mode tokens
214 self.sm.st.opcodes.resize(self.last_function, Opcode::NOP);
215
216 // Get the compiled assembler from the token vector
217 let mut compiled = self.compile_token_vector(tokens)?;
218 // Put the return OpCode onto the end
219 compiled.push(Opcode::RET);
220 // The current function start is the end of the last function
221 let function_start = self.last_function;
222 // Move last function pointer
223 self.last_function += compiled.len();
224 // Remember the opcodes for reflection purposes if its enabled
225 #[cfg(feature = "enable_reflection")]
226 self.word_opcodes
227 .insert(word_name.to_owned(), compiled.clone());
228 // Add the function to the opcode memory
229 self.sm.st.opcodes.append(&mut compiled);
230 // Remember where to find it...
231 self.word_addresses
232 .insert(word_name.to_owned(), function_start);
233 // println!("Token Memory {:?}", self.sm.st.opcodes);
234 // println!("Word Addresses {:?}", self.word_addresses);
235 // println!("Last function {}", self.last_function);
236 #[cfg(feature = "enable_reflection")]
237 self.word_definitions
238 .insert(word_name.to_owned(), format!("{:?}", tokens));
239 Ok(())
240 }
241
242 fn compile_token_vector(
243 &mut self,
244 token_vector: &[ForthToken],
245 ) -> Result<Vec<Opcode>, ForthError> {
246 // Stack of if statements, they are deferred until the THEN Forth word
247 let mut deferred_statements = Vec::new();
248 // List of compiled processor opcodes that we are building up
249 let mut tv: Vec<Opcode> = Vec::new();
250
251 // Go through all the Forth tokens and turn them into processor Opcodes (for our StackMachine emulated processor)
252 for t in token_vector.iter() {
253 match t {
254 ForthToken::DropLineComment(_) => (),
255 ForthToken::ParenthesizedRemark(_) => (),
256 ForthToken::StringCommand(_, _) => (),
257 ForthToken::Number(n) => {
258 // Numbers get pushed as a LDI opcode
259 tv.push(Opcode::LDI(*n));
260 }
261 ForthToken::Command(s) => {
262 // Remember where we are in the list of opcodes in case we hit a IF statement, LOOP etc...
263 let current_instruction = tv.len();
264
265 match s.as_ref() {
266 "DO" => {
267 let start_of_loop_code = current_instruction;
268 // This eats the loop parameters from the number stack...
269 tv.push(Opcode::PUSHLP);
270 let logical_start_of_loop = tv.len();
271 deferred_statements.push(DeferredStatement::DoLoop(
272 DeferredDoLoopStatement::new(
273 start_of_loop_code,
274 logical_start_of_loop,
275 ),
276 LoopExits::new(),
277 ));
278 }
279 "LOOP" => {
280 if let Some(DeferredStatement::DoLoop(loop_def, loop_exits)) =
281 deferred_statements.pop()
282 {
283 let jump_back = i64::try_from(loop_def.logical_start).unwrap()
284 - i64::try_from(current_instruction).unwrap()
285 // Have to jump back over the JR and the LDI
286 - 3;
287 tv.push(Opcode::INCLP);
288 tv.push(Opcode::CMPLOOP);
289 tv.push(Opcode::LDI(jump_back));
290 tv.push(Opcode::JRZ);
291
292 loop_exits.fixup_loop_exits(&mut tv);
293 } else {
294 return Err(ForthError::InvalidSyntax(
295 "LOOP without proper loop start like DO".to_owned(),
296 ));
297 }
298 tv.push(Opcode::DROPLP);
299 }
300 "+LOOP" => {
301 if let Some(DeferredStatement::DoLoop(loop_def, loop_exits)) =
302 deferred_statements.pop()
303 {
304 let jump_back = i64::try_from(loop_def.logical_start).unwrap()
305 - i64::try_from(current_instruction).unwrap()
306 // Have to jump back over the JR and the LDI
307 - 3;
308 tv.push(Opcode::ADDLP);
309 tv.push(Opcode::CMPLOOP);
310 tv.push(Opcode::LDI(jump_back));
311 tv.push(Opcode::JRZ);
312
313 loop_exits.fixup_loop_exits(&mut tv);
314 } else {
315 return Err(ForthError::InvalidSyntax(
316 "+LOOP without proper loop start like DO".to_owned(),
317 ));
318 }
319 tv.push(Opcode::DROPLP);
320 }
321 "LEAVE" => {
322 let most_recent_loop_statement =
323 deferred_statements.iter_mut().rev().find(|x| match **x {
324 DeferredStatement::If(_) => false,
325 DeferredStatement::DoLoop(_, _) => true,
326 DeferredStatement::BeginLoop(_, _) => true,
327 });
328 if let Some(deferred_statement) = most_recent_loop_statement {
329 let loop_exits =
330 match deferred_statement {
331 DeferredStatement::DoLoop(_, loop_exits) => loop_exits,
332 DeferredStatement::BeginLoop(_, loop_exits) => loop_exits,
333 _ => return Err(ForthError::InvalidSyntax(
334 "LEAVE without proper loop start like DO or BEGIN(1)"
335 .to_owned(),
336 )),
337 };
338 // Record the exit point
339 loop_exits.add_exit_point(current_instruction);
340
341 // We fix up the jumps once we get the end of loop
342 tv.push(Opcode::LDI(0));
343 tv.push(Opcode::JR);
344 } else {
345 return Err(ForthError::InvalidSyntax(
346 "LEAVE without proper loop start like DO or BEGIN(2)"
347 .to_owned(),
348 ));
349 }
350 }
351 "BEGIN" => {
352 deferred_statements.push(DeferredStatement::BeginLoop(
353 DeferredBeginLoopStatement::new(current_instruction),
354 LoopExits::new(),
355 ));
356 }
357 "UNTIL" => {
358 if let Some(DeferredStatement::BeginLoop(loop_def, loop_exits)) =
359 deferred_statements.pop()
360 {
361 let jump_back = i64::try_from(loop_def.logical_start).unwrap()
362 - i64::try_from(current_instruction).unwrap()
363 // Have to jump back over the JR and the LDI
364 - 1;
365 tv.push(Opcode::LDI(jump_back));
366 tv.push(Opcode::JRZ);
367
368 loop_exits.fixup_loop_exits(&mut tv);
369 } else {
370 return Err(ForthError::InvalidSyntax(
371 "UNTIL without proper loop start like BEGIN".to_owned(),
372 ));
373 }
374 }
375 "WHILE" => {
376 if let Some(DeferredStatement::BeginLoop(_loop_def, loop_exits)) =
377 deferred_statements.last_mut()
378 {
379 loop_exits.add_exit_point(current_instruction);
380 // We fix up the jumps once we get the end of loop
381 tv.push(Opcode::LDI(0));
382 tv.push(Opcode::JRZ);
383 } else {
384 return Err(ForthError::InvalidSyntax(
385 "WHILE without proper loop start like BEGIN".to_owned(),
386 ));
387 }
388 }
389 "REPEAT" => {
390 if let Some(DeferredStatement::BeginLoop(loop_def, loop_exits)) =
391 deferred_statements.pop()
392 {
393 let jump_back = i64::try_from(loop_def.logical_start).unwrap()
394 - i64::try_from(current_instruction).unwrap()
395 // Have to jump back over the JR and the LDI
396 - 1;
397 tv.push(Opcode::LDI(jump_back));
398 tv.push(Opcode::JR);
399
400 loop_exits.fixup_loop_exits(&mut tv);
401 } else {
402 return Err(ForthError::InvalidSyntax(
403 "AGAIN without proper loop start like BEGIN".to_owned(),
404 ));
405 }
406 }
407 "AGAIN" => {
408 if let Some(DeferredStatement::BeginLoop(loop_def, loop_exits)) =
409 deferred_statements.pop()
410 {
411 let jump_back = i64::try_from(loop_def.logical_start).unwrap()
412 - i64::try_from(current_instruction).unwrap()
413 // Have to jump back over the JR and the LDI
414 - 1;
415 tv.push(Opcode::LDI(jump_back));
416 tv.push(Opcode::JR);
417
418 loop_exits.fixup_loop_exits(&mut tv);
419 } else {
420 return Err(ForthError::InvalidSyntax(
421 "AGAIN without proper loop start like BEGIN".to_owned(),
422 ));
423 }
424 }
425 // FLAG 0 = Skip stuff inside IF, !0 = Run stuff inside IF
426 "IF" => {
427 deferred_statements.push(DeferredStatement::If(
428 DeferredIfStatement::new(current_instruction),
429 ));
430 //println!("(IF)Deferred If Stack {:?}", deferred_if_statements);
431 tv.push(Opcode::LDI(0));
432 tv.push(Opcode::JRZ);
433 }
434 "ELSE" => {
435 if let Some(DeferredStatement::If(x)) = deferred_statements.last_mut() {
436 x.else_location = Some(current_instruction);
437 //println!("(ELSE) Deferred If Stack {:?}", deferred_if_statements);
438 tv.push(Opcode::LDI(0));
439 tv.push(Opcode::JR);
440 } else {
441 return Err(ForthError::InvalidSyntax(
442 "ELSE without IF".to_owned(),
443 ));
444 }
445 }
446 "THEN" => {
447 // This only works if there isn't an ELSE statement, it needs to jump differently if there is an ELSE statement
448 //println!("(THEN) Deferred If Stack {:?}", deferred_if_statements);
449 if let Some(DeferredStatement::If(x)) = deferred_statements.pop() {
450 //println!("(if let Some(x)) Deferred If Stack {:?}", x);
451 let if_jump_location = x.if_location;
452 let if_jump_offset = match x.else_location {
453 None => (current_instruction as u64
454 - (x.if_location + 1) as u64)
455 .try_into()
456 .unwrap(),
457 Some(el) => (current_instruction as u64 - el as u64 + 1)
458 .try_into()
459 .unwrap(),
460 };
461 let (else_jump_location, else_jump_offset): (
462 Option<usize>,
463 Option<i64>,
464 ) = match x.else_location {
465 Some(x) => (
466 Some(x),
467 Some(
468 i64::try_from(
469 current_instruction as u64 - (x + 1) as u64,
470 )
471 .unwrap(),
472 ),
473 ),
474 None => (None, None),
475 };
476 //println!("if structure: {:?}", x);
477 tv[if_jump_location] = Opcode::LDI(if_jump_offset);
478 if let (Some(location), Some(offset)) =
479 (else_jump_location, else_jump_offset)
480 {
481 tv[location] = Opcode::LDI(offset);
482 }
483 } else {
484 return Err(ForthError::InvalidSyntax(
485 "THEN without IF".to_owned(),
486 ));
487 }
488 }
489 _ => {
490 if let Some(&offset) = self.word_addresses.get(*s) {
491 tv.push(Opcode::LDI(offset as i64));
492 tv.push(Opcode::CALL);
493 } else if let Some(ol) = self.intrinsic_words.get(*s) {
494 tv.extend_from_slice(ol);
495 } else {
496 return Err(ForthError::UnknownToken((*s).to_string()));
497 }
498 }
499 }
500 }
501 ForthToken::Colon => {
502 unreachable!("Colon should never reach this function");
503 }
504 ForthToken::SemiColon => {
505 unreachable!("SemiColon should never reach this function");
506 }
507 }
508 }
509
510 Ok(tv)
511 }
512
513 fn execute_tokens(
514 &mut self,
515 token_source: &ForthTokenizer,
516 gas_limit: GasLimit,
517 ) -> Result<(), ForthError> {
518 let mut ol = self.compile_tokens_compile_and_remove_word_definitions(token_source)?;
519 //println!("Compiled Opcodes: {:?}", ol);
520 self.sm.st.opcodes.resize(self.last_function, Opcode::NOP);
521 self.sm.st.opcodes.append(&mut ol);
522 self.sm.execute(self.last_function, gas_limit)?;
523 //println!("Total opcodes defined: {}", self.sm.st.opcodes.len());
524 //println!("Total opcodes executed: {}", self.sm.st.gas_used());
525
526 Ok(())
527 }
528
529 pub fn execute_string(&mut self, s: &str, gas_limit: GasLimit) -> Result<(), ForthError> {
530 let tokenizer = ForthTokenizer::new(s);
531 self.execute_tokens(&tokenizer, gas_limit)
532 }
533}