rust_forth_compiler/
lib.rs

1use rust_forth_tokenizer::ForthToken;
2use rust_forth_tokenizer::ForthTokenizer;
3pub use rust_simple_stack_processor::GasLimit;
4use rust_simple_stack_processor::Opcode;
5use rust_simple_stack_processor::StackMachine;
6
7mod error;
8
9pub use error::ForthError;
10use std::collections::HashMap;
11use std::convert::TryFrom;
12use std::convert::TryInto;
13
14#[cfg(test)]
15mod tests;
16
17// This macro lets you statically initialize a hashmap
18macro_rules! hashmap {
19    ($( $key: expr => $val: expr ),*) => {{
20         let mut map = ::std::collections::HashMap::new();
21         $( map.insert($key, $val); )*
22         map
23    }}
24}
25
26pub struct ForthCompiler {
27    // This is the Stack Machine processor that runs the compiled Forth instructions
28    pub sm: StackMachine,
29    // These are the words that we know how to work with regardless, things like DROP, MUL, etc
30    intrinsic_words: HashMap<&'static str, Vec<Opcode>>,
31    // This is where we remember where we put compiled words in the *memory* of the StackMachine
32    // We run the interactive opcodes after these compiled words, and then erase the memory after
33    // the compiled words again for the next batch of interactive opcodes.
34    #[cfg(feature = "enable_reflection")]
35    pub word_addresses: HashMap<String, usize>,
36    #[cfg(not(feature = "enable_reflection"))]
37    word_addresses: HashMap<String, usize>,
38    // This is the location in memory that points to the location after the last compiled opcode
39    // So its an ideal place to run interactive compiled opcodes
40    last_function: usize,
41    // Remember the definition for words
42    #[cfg(feature = "enable_reflection")]
43    pub word_definitions: HashMap<String, String>,
44    // Remember the opcodes for words
45    #[cfg(feature = "enable_reflection")]
46    pub word_opcodes: HashMap<String, Vec<Opcode>>,
47}
48
49impl Default for ForthCompiler {
50    fn default() -> ForthCompiler {
51        ForthCompiler {
52            sm: StackMachine::default(),
53            intrinsic_words: hashmap![
54            "SWAP" => vec![Opcode::SWAP],
55            "NOT" => vec![Opcode::NOT],
56            "ADD" => vec![Opcode::ADD],
57            "SUB" => vec![Opcode::SUB],
58            "MUL" => vec![Opcode::MUL],
59            "DIV" => vec![Opcode::DIV],
60            "DUP" => vec![Opcode::DUP],
61            "2DUP" => vec![Opcode::DUP2],
62            "TRAP" => vec![Opcode::TRAP],
63            "DROP" => vec![Opcode::DROP],
64            "2DROP" => vec![Opcode::DROP,Opcode::DROP],
65            "2OVER" => vec![Opcode::OVER2],
66            "2SWAP" => vec![Opcode::SWAP2],
67            "1+" => vec![Opcode::LDI(1),Opcode::ADD],
68            "1-" => vec![Opcode::LDI(-1),Opcode::ADD],
69            "2+" => vec![Opcode::LDI(2),Opcode::ADD],
70            "2-" => vec![Opcode::LDI(-2),Opcode::ADD],
71            "2*" => vec![Opcode::LDI(2),Opcode::MUL],
72            "2/" => vec![Opcode::LDI(2),Opcode::DIV],
73            "I" => vec![Opcode::GETLP],
74            "J" => vec![Opcode::GETLP2],
75            "AND" => vec![Opcode::AND],
76            "=" => vec![Opcode::SUB,Opcode::CMPZ],
77            "<>" => vec![Opcode::SUB,Opcode::CMPNZ]
78            ],
79            word_addresses: HashMap::new(),
80            last_function: 0,
81            #[cfg(feature = "enable_reflection")]
82            word_definitions: HashMap::new(),
83            #[cfg(feature = "enable_reflection")]
84            word_opcodes: HashMap::new(),
85        }
86    }
87}
88
89// This struct tracks information for Forth IF statements
90#[derive(Debug)]
91struct DeferredIfStatement {
92    if_location: usize,
93    else_location: Option<usize>,
94}
95
96impl DeferredIfStatement {
97    pub fn new(if_location: usize) -> DeferredIfStatement {
98        DeferredIfStatement {
99            if_location,
100            else_location: None,
101        }
102    }
103}
104
105// This struct tracks information for Forth Loop statements
106#[derive(Debug)]
107struct DeferredDoLoopStatement {
108    _prelude_start: usize, // This is only used for debugging the internal code
109    logical_start: usize,
110}
111
112impl DeferredDoLoopStatement {
113    pub fn new(_prelude_start: usize, logical_start: usize) -> DeferredDoLoopStatement {
114        DeferredDoLoopStatement {
115            _prelude_start,
116            logical_start,
117        }
118    }
119}
120
121#[derive(Debug)]
122struct LoopExits {
123    loop_exit_locations: Vec<usize>,
124}
125
126impl LoopExits {
127    pub fn new() -> LoopExits {
128        LoopExits {
129            loop_exit_locations: Vec::new(),
130        }
131    }
132
133    pub fn add_exit_point(&mut self, loop_exit_location: usize) {
134        self.loop_exit_locations.push(loop_exit_location);
135    }
136
137    fn fixup_loop_exits(&self, opcode_vector: &mut Vec<Opcode>) {
138        let loop_exit_point = opcode_vector.len();
139        for leave_point in self.loop_exit_locations.iter() {
140            let jump_forward =
141                i64::try_from(loop_exit_point).unwrap() - i64::try_from(*leave_point).unwrap() - 1;
142            opcode_vector[*leave_point] = Opcode::LDI(jump_forward);
143        }
144    }
145}
146
147#[derive(Debug)]
148struct DeferredBeginLoopStatement {
149    logical_start: usize,
150}
151
152impl DeferredBeginLoopStatement {
153    pub fn new(logical_start: usize) -> DeferredBeginLoopStatement {
154        DeferredBeginLoopStatement { logical_start }
155    }
156}
157
158enum DeferredStatement {
159    If(DeferredIfStatement),
160    DoLoop(DeferredDoLoopStatement, LoopExits),
161    BeginLoop(DeferredBeginLoopStatement, LoopExits),
162}
163
164impl ForthCompiler {
165    fn compile_tokens_compile_and_remove_word_definitions(
166        &mut self,
167        token_source: &ForthTokenizer,
168    ) -> Result<Vec<Opcode>, ForthError> {
169        // This is the interactive compiled token list
170        let mut tvi = Vec::new();
171
172        // Because we consume tokens in an inner loop, we can't use the normal for loop to read the tokens
173        let mut iter = token_source.into_iter();
174        while let Some(token) = iter.next() {
175            match token {
176                // If a colon token, then compile the word definition
177                ForthToken::Colon => {
178                    // Get the next token which has to be a command token, or its an error, this token will be the name to compile to
179                    if let Some(ForthToken::Command(word_name)) = iter.next() {
180                        // This is the list of tokens we will be compiling
181                        let mut tvc = Vec::new();
182                        let mut found_semicolon = false;
183                        // Because this is an inner loop using the outer iterator, we can't use the normal for loop syntax
184                        while let Some(token) = iter.next() {
185                            match token {
186                                ForthToken::SemiColon => {
187                                    // We have found the end of the word definition, so compile to opcodes and put into memory...
188                                    self.compile_tokens_as_word(word_name, &tvc)?;
189                                    found_semicolon = true;
190                                    break;
191                                }
192                                _ => tvc.push(token),
193                            }
194                        }
195                        if !found_semicolon {
196                            return Err(ForthError::MissingSemicolonAfterColon);
197                        }
198                    } else {
199                        // The command token has to be right after the colon token, we don't permit things like comments, we could though...
200                        return Err(ForthError::MissingCommandAfterColon);
201                    }
202                }
203                ForthToken::SemiColon => {
204                    return Err(ForthError::SemicolonBeforeColon);
205                }
206                _ => {
207                    tvi.push(token);
208                }
209            }
210        }
211
212        let mut compiled_tokens = self.compile_token_vector(&tvi)?;
213
214        // We need to return after running the interactive opcodes, so put the return in now
215        compiled_tokens.push(Opcode::RET);
216
217        Ok(compiled_tokens)
218    }
219
220    fn compile_tokens_as_word(
221        &mut self,
222        word_name: &str,
223        tokens: &[ForthToken],
224    ) -> Result<(), ForthError> {
225        // Remove anything extraneous from the end of the opcode array (*processor memory*),
226        // typically previous immediate mode tokens
227        self.sm.st.opcodes.resize(self.last_function, Opcode::NOP);
228
229        // Get the compiled assembler from the token vector
230        let mut compiled = self.compile_token_vector(tokens)?;
231        // Put the return OpCode onto the end
232        compiled.push(Opcode::RET);
233        // The current function start is the end of the last function
234        let function_start = self.last_function;
235        // Move last function pointer
236        self.last_function += compiled.len();
237        // Remember the opcodes for reflection purposes if its enabled
238        #[cfg(feature = "enable_reflection")]
239        self.word_opcodes
240            .insert(word_name.to_owned(), compiled.clone());
241        // Add the function to the opcode memory
242        self.sm.st.opcodes.append(&mut compiled);
243        // Remember where to find it...
244        self.word_addresses
245            .insert(word_name.to_owned(), function_start);
246        //        println!("Token Memory {:?}", self.sm.st.opcodes);
247        //        println!("Word Addresses {:?}", self.word_addresses);
248        //        println!("Last function {}", self.last_function);
249        #[cfg(feature = "enable_reflection")]
250        self.word_definitions
251            .insert(word_name.to_owned(), format!("{:?}", tokens));
252        Ok(())
253    }
254
255    fn compile_token_vector(
256        &mut self,
257        token_vector: &[ForthToken],
258    ) -> Result<Vec<Opcode>, ForthError> {
259        // Stack of if statements, they are deferred until the THEN Forth word
260        let mut deferred_statements = Vec::new();
261        // List of compiled processor opcodes that we are building up
262        let mut tv: Vec<Opcode> = Vec::new();
263
264        // Go through all the Forth tokens and turn them into processor Opcodes (for our StackMachine emulated processor)
265        for t in token_vector.iter() {
266            match t {
267                ForthToken::DropLineComment(_) => (),
268                ForthToken::ParenthesizedRemark(_) => (),
269                ForthToken::StringCommand(_, _) => (),
270                ForthToken::Number(n) => {
271                    // Numbers get pushed as a LDI opcode
272                    tv.push(Opcode::LDI(*n));
273                }
274                ForthToken::Command(s) => {
275                    // Remember where we are in the list of opcodes in case we hit a IF statement, LOOP etc...
276                    let current_instruction = tv.len();
277
278                    match s.as_ref() {
279                        "DO" => {
280                            let start_of_loop_code = current_instruction;
281                            // This eats the loop parameters from the number stack...
282                            tv.push(Opcode::PUSHLP);
283                            let logical_start_of_loop = tv.len();
284                            deferred_statements.push(DeferredStatement::DoLoop(
285                                DeferredDoLoopStatement::new(
286                                    start_of_loop_code,
287                                    logical_start_of_loop,
288                                ),
289                                LoopExits::new(),
290                            ));
291                        }
292                        "LOOP" => {
293                            if let Some(DeferredStatement::DoLoop(loop_def, loop_exits)) =
294                                deferred_statements.pop()
295                            {
296                                let jump_back = i64::try_from(loop_def.logical_start).unwrap()
297                                    - i64::try_from(current_instruction).unwrap()
298                                    // Have to jump back over the JR and the LDI
299                                    - 3;
300                                tv.push(Opcode::INCLP);
301                                tv.push(Opcode::CMPLOOP);
302                                tv.push(Opcode::LDI(jump_back));
303                                tv.push(Opcode::JRZ);
304
305                                loop_exits.fixup_loop_exits(&mut tv);
306                            } else {
307                                return Err(ForthError::InvalidSyntax(
308                                    "LOOP without proper loop start like DO".to_owned(),
309                                ));
310                            }
311                            tv.push(Opcode::DROPLP);
312                        }
313                        "+LOOP" => {
314                            if let Some(DeferredStatement::DoLoop(loop_def, loop_exits)) =
315                                deferred_statements.pop()
316                            {
317                                let jump_back = i64::try_from(loop_def.logical_start).unwrap()
318                                    - i64::try_from(current_instruction).unwrap()
319                                    // Have to jump back over the JR and the LDI
320                                    - 3;
321                                tv.push(Opcode::ADDLP);
322                                tv.push(Opcode::CMPLOOP);
323                                tv.push(Opcode::LDI(jump_back));
324                                tv.push(Opcode::JRZ);
325
326                                loop_exits.fixup_loop_exits(&mut tv);
327                            } else {
328                                return Err(ForthError::InvalidSyntax(
329                                    "+LOOP without proper loop start like DO".to_owned(),
330                                ));
331                            }
332                            tv.push(Opcode::DROPLP);
333                        }
334                        "LEAVE" => {
335                            let most_recent_loop_statement =
336                                deferred_statements.iter_mut().rev().find(|x| match **x {
337                                    DeferredStatement::If(_) => false,
338                                    DeferredStatement::DoLoop(_, _) => true,
339                                    DeferredStatement::BeginLoop(_, _) => true,
340                                });
341                            if let Some(deferred_statement) = most_recent_loop_statement {
342                                let loop_exits =
343                                    match deferred_statement {
344                                        DeferredStatement::DoLoop(_, loop_exits) => loop_exits,
345                                        DeferredStatement::BeginLoop(_, loop_exits) => loop_exits,
346                                        _ => return Err(ForthError::InvalidSyntax(
347                                            "LEAVE without proper loop start like DO or BEGIN(1)"
348                                                .to_owned(),
349                                        )),
350                                    };
351                                // Record the exit point
352                                loop_exits.add_exit_point(current_instruction);
353
354                                // We fix up the jumps once we get the end of loop
355                                tv.push(Opcode::LDI(0));
356                                tv.push(Opcode::JR);
357                            } else {
358                                return Err(ForthError::InvalidSyntax(
359                                    "LEAVE without proper loop start like DO or BEGIN(2)"
360                                        .to_owned(),
361                                ));
362                            }
363                        }
364                        "BEGIN" => {
365                            deferred_statements.push(DeferredStatement::BeginLoop(
366                                DeferredBeginLoopStatement::new(current_instruction),
367                                LoopExits::new(),
368                            ));
369                        }
370                        "UNTIL" => {
371                            if let Some(DeferredStatement::BeginLoop(loop_def, loop_exits)) =
372                                deferred_statements.pop()
373                            {
374                                let jump_back = i64::try_from(loop_def.logical_start).unwrap()
375                                    - i64::try_from(current_instruction).unwrap()
376                                    // Have to jump back over the JR and the LDI
377                                    - 1;
378                                tv.push(Opcode::LDI(jump_back));
379                                tv.push(Opcode::JRZ);
380
381                                loop_exits.fixup_loop_exits(&mut tv);
382                            } else {
383                                return Err(ForthError::InvalidSyntax(
384                                    "UNTIL without proper loop start like BEGIN".to_owned(),
385                                ));
386                            }
387                        }
388                        "WHILE" => {
389                            if let Some(DeferredStatement::BeginLoop(_loop_def, loop_exits)) =
390                                deferred_statements.last_mut()
391                            {
392                                loop_exits.add_exit_point(current_instruction);
393                                // We fix up the jumps once we get the end of loop
394                                tv.push(Opcode::LDI(0));
395                                tv.push(Opcode::JRZ);
396                            } else {
397                                return Err(ForthError::InvalidSyntax(
398                                    "WHILE without proper loop start like BEGIN".to_owned(),
399                                ));
400                            }
401                        }
402                        "REPEAT" => {
403                            if let Some(DeferredStatement::BeginLoop(loop_def, loop_exits)) =
404                                deferred_statements.pop()
405                            {
406                                let jump_back = i64::try_from(loop_def.logical_start).unwrap()
407                                    - i64::try_from(current_instruction).unwrap()
408                                    // Have to jump back over the JR and the LDI
409                                    - 1;
410                                tv.push(Opcode::LDI(jump_back));
411                                tv.push(Opcode::JR);
412
413                                loop_exits.fixup_loop_exits(&mut tv);
414                            } else {
415                                return Err(ForthError::InvalidSyntax(
416                                    "AGAIN without proper loop start like BEGIN".to_owned(),
417                                ));
418                            }
419                        }
420                        "AGAIN" => {
421                            if let Some(DeferredStatement::BeginLoop(loop_def, loop_exits)) =
422                                deferred_statements.pop()
423                            {
424                                let jump_back = i64::try_from(loop_def.logical_start).unwrap()
425                                    - i64::try_from(current_instruction).unwrap()
426                                    // Have to jump back over the JR and the LDI
427                                    - 1;
428                                tv.push(Opcode::LDI(jump_back));
429                                tv.push(Opcode::JR);
430
431                                loop_exits.fixup_loop_exits(&mut tv);
432                            } else {
433                                return Err(ForthError::InvalidSyntax(
434                                    "AGAIN without proper loop start like BEGIN".to_owned(),
435                                ));
436                            }
437                        }
438                        // FLAG 0 = Skip stuff inside IF, !0 = Run stuff inside IF
439                        "IF" => {
440                            deferred_statements.push(DeferredStatement::If(
441                                DeferredIfStatement::new(current_instruction),
442                            ));
443                            //println!("(IF)Deferred If Stack {:?}", deferred_if_statements);
444                            tv.push(Opcode::LDI(0));
445                            tv.push(Opcode::JRZ);
446                        }
447                        "ELSE" => {
448                            if let Some(DeferredStatement::If(x)) = deferred_statements.last_mut() {
449                                x.else_location = Some(current_instruction);
450                                //println!("(ELSE) Deferred If Stack {:?}", deferred_if_statements);
451                                tv.push(Opcode::LDI(0));
452                                tv.push(Opcode::JR);
453                            } else {
454                                return Err(ForthError::InvalidSyntax(
455                                    "ELSE without IF".to_owned(),
456                                ));
457                            }
458                        }
459                        "THEN" => {
460                            // This only works if there isn't an ELSE statement, it needs to jump differently if there is an ELSE statement
461                            //println!("(THEN) Deferred If Stack {:?}", deferred_if_statements);
462                            if let Some(DeferredStatement::If(x)) = deferred_statements.pop() {
463                                //println!("(if let Some(x)) Deferred If Stack {:?}", x);
464                                let if_jump_location = x.if_location;
465                                let if_jump_offset = match x.else_location {
466                                    None => (current_instruction as u64
467                                        - (x.if_location + 1) as u64)
468                                        .try_into()
469                                        .unwrap(),
470                                    Some(el) => (current_instruction as u64 - el as u64 + 1)
471                                        .try_into()
472                                        .unwrap(),
473                                };
474                                let (else_jump_location, else_jump_offset): (
475                                    Option<usize>,
476                                    Option<i64>,
477                                ) = match x.else_location {
478                                    Some(x) => (
479                                        Some(x),
480                                        Some(
481                                            i64::try_from(
482                                                current_instruction as u64 - (x + 1) as u64,
483                                            )
484                                            .unwrap(),
485                                        ),
486                                    ),
487                                    None => (None, None),
488                                };
489                                //println!("if structure: {:?}", x);
490                                tv[if_jump_location] = Opcode::LDI(if_jump_offset);
491                                if let (Some(location), Some(offset)) =
492                                    (else_jump_location, else_jump_offset)
493                                {
494                                    tv[location] = Opcode::LDI(offset);
495                                }
496                            } else {
497                                return Err(ForthError::InvalidSyntax(
498                                    "THEN without IF".to_owned(),
499                                ));
500                            }
501                        }
502                        _ => {
503                            if let Some(offset) = self.word_addresses.get(*s) {
504                                tv.push(Opcode::LDI(*offset as i64));
505                                tv.push(Opcode::CALL);
506                            } else if let Some(ol) = self.intrinsic_words.get::<str>(s) {
507                                tv.append(&mut ol.clone());
508                            } else {
509                                return Err(ForthError::UnknownToken(s.to_string()));
510                            }
511                        }
512                    }
513                }
514                ForthToken::Colon => {
515                    panic!("Colon should never reach this function");
516                }
517                ForthToken::SemiColon => {
518                    panic!("SemiColon should never reach this function");
519                }
520            }
521        }
522
523        Ok(tv)
524    }
525
526    fn execute_tokens(
527        &mut self,
528        token_source: &ForthTokenizer,
529        gas_limit: GasLimit,
530    ) -> Result<(), ForthError> {
531        let mut ol = self.compile_tokens_compile_and_remove_word_definitions(token_source)?;
532        //println!("Compiled Opcodes: {:?}", ol);
533        self.sm.st.opcodes.resize(self.last_function, Opcode::NOP);
534        self.sm.st.opcodes.append(&mut ol);
535        self.sm.execute(self.last_function, gas_limit)?;
536        //println!("Total opcodes defined: {}", self.sm.st.opcodes.len());
537        //println!("Total opcodes executed: {}", self.sm.st.gas_used());
538
539        Ok(())
540    }
541
542    pub fn execute_string(&mut self, s: &str, gas_limit: GasLimit) -> Result<(), ForthError> {
543        let tokenizer = ForthTokenizer::new(&s);
544        self.execute_tokens(&tokenizer, gas_limit)?;
545        Ok(())
546    }
547}