rust_forth_compiler/
lib.rs

1use rust_forth_tokenizer::ForthToken;
2use rust_forth_tokenizer::ForthTokenizer;
3pub use rust_simple_stack_processor::GasLimit;
4use rust_simple_stack_processor::Opcode;
5use rust_simple_stack_processor::StackMachine;
6
7mod error;
8
9pub use error::ForthError;
10use std::collections::HashMap;
11use std::convert::TryFrom;
12use std::convert::TryInto;
13
14#[cfg(test)]
15mod tests;
16
17// This macro lets you statically initialize a hashmap
18macro_rules! hashmap {
19    ($( $key: expr => $val: expr ),*) => {{
20         let mut map = ::std::collections::HashMap::new();
21         $( map.insert($key, $val); )*
22         map
23    }}
24}
25
26pub struct ForthCompiler {
27    // This is the Stack Machine processor that runs the compiled Forth instructions
28    pub sm: StackMachine,
29    // These are the words that we know how to work with regardless, things like DROP, MUL, etc
30    intrinsic_words: HashMap<&'static str, Vec<Opcode>>,
31    // This is where we remember where we put compiled words in the *memory* of the StackMachine
32    // We run the interactive opcodes after these compiled words, and then erase the memory after
33    // the compiled words again for the next batch of interactive opcodes.
34    #[cfg(feature="enable_reflection")]
35    pub word_addresses: HashMap<String, usize>,
36    #[cfg(not(feature="enable_reflection"))]
37    word_addresses: HashMap<String, usize>,
38    // This is the location in memory that points to the location after the last compiled opcode
39    // So its an ideal place to run interactive compiled opcodes
40    last_function: usize,
41    // Remember the definition for words
42    #[cfg(feature="enable_reflection")]
43    pub word_definitions: HashMap<String, String>,
44    // Remember the opcodes for words
45    #[cfg(feature="enable_reflection")]
46    pub word_opcodes: HashMap<String, Vec<Opcode>>,
47}
48
49impl Default for ForthCompiler {
50    fn default() -> ForthCompiler {
51        ForthCompiler {
52            sm: StackMachine::default(),
53            intrinsic_words: hashmap![
54            "SWAP" => vec![Opcode::SWAP],
55            "NOT" => vec![Opcode::NOT],
56            "ADD" => vec![Opcode::ADD],
57            "SUB" => vec![Opcode::SUB],
58            "MUL" => vec![Opcode::MUL],
59            "DIV" => vec![Opcode::DIV],
60            "DUP" => vec![Opcode::DUP],
61            "2DUP" => vec![Opcode::DUP2],
62            "TRAP" => vec![Opcode::TRAP],
63            "DROP" => vec![Opcode::DROP],
64            "2DROP" => vec![Opcode::DROP,Opcode::DROP],
65            "2OVER" => vec![Opcode::OVER2],
66            "2SWAP" => vec![Opcode::SWAP2],
67            "1+" => vec![Opcode::LDI(1),Opcode::ADD],
68            "1-" => vec![Opcode::LDI(-1),Opcode::ADD],
69            "2+" => vec![Opcode::LDI(2),Opcode::ADD],
70            "2-" => vec![Opcode::LDI(-2),Opcode::ADD],
71            "2*" => vec![Opcode::LDI(2),Opcode::MUL],
72            "2/" => vec![Opcode::LDI(2),Opcode::DIV],
73            "I" => vec![Opcode::GETLP],
74            "J" => vec![Opcode::GETLP2],
75            "AND" => vec![Opcode::AND],
76            "=" => vec![Opcode::SUB,Opcode::CMPZ],
77            "<>" => vec![Opcode::SUB,Opcode::CMPNZ]
78            ],
79            word_addresses: HashMap::new(),
80            last_function: 0,
81            #[cfg(feature="enable_reflection")]
82            word_definitions: HashMap::new(),
83            #[cfg(feature="enable_reflection")]
84            word_opcodes: HashMap::new(),
85        }
86    }
87}
88
89// This struct tracks information for Forth IF statements
90#[derive(Debug)]
91struct DeferredIfStatement {
92    if_location: usize,
93    else_location: Option<usize>,
94}
95
96impl DeferredIfStatement {
97    pub fn new(if_location: usize) -> DeferredIfStatement {
98        DeferredIfStatement {
99            if_location,
100            else_location: None,
101        }
102    }
103}
104
105// This struct tracks information for Forth Loop statements
106#[derive(Debug)]
107struct DeferredDoLoopStatement {
108    prelude_start: usize,
109    logical_start: usize,
110}
111
112impl DeferredDoLoopStatement {
113    pub fn new(prelude_start: usize, logical_start: usize) -> DeferredDoLoopStatement {
114        DeferredDoLoopStatement {
115            prelude_start,
116            logical_start,
117        }
118    }
119}
120
121#[derive(Debug)]
122struct LoopExits {
123    loop_exit_locations: Vec<usize>,
124}
125
126impl LoopExits {
127    pub fn new() -> LoopExits {
128        LoopExits {
129            loop_exit_locations: Vec::new(),
130        }
131    }
132
133    pub fn add_exit_point(&mut self, loop_exit_location: usize) {
134        self.loop_exit_locations.push(loop_exit_location);
135    }
136
137    fn fixup_loop_exits(&self, opcode_vector: &mut Vec<Opcode>) {
138        let loop_exit_point = opcode_vector.len();
139        for leave_point in self.loop_exit_locations.iter() {
140            let jump_forward =
141                i64::try_from(loop_exit_point).unwrap() - i64::try_from(*leave_point).unwrap() - 1;
142            opcode_vector[*leave_point] = Opcode::LDI(jump_forward);
143        }
144    }
145}
146
147#[derive(Debug)]
148struct DeferredBeginLoopStatement {
149    logical_start: usize,
150}
151
152impl DeferredBeginLoopStatement {
153    pub fn new(logical_start: usize) -> DeferredBeginLoopStatement {
154        DeferredBeginLoopStatement { logical_start }
155    }
156}
157
158enum DeferredStatement {
159    If(DeferredIfStatement),
160    DoLoop(DeferredDoLoopStatement, LoopExits),
161    BeginLoop(DeferredBeginLoopStatement, LoopExits),
162}
163
164impl ForthCompiler {
165    fn compile_tokens_compile_and_remove_word_definitions(
166        &mut self,
167        token_source: &ForthTokenizer,
168    ) -> Result<Vec<Opcode>, ForthError> {
169        // This is the interactive compiled token list
170        let mut tvi = Vec::new();
171
172        // Because we consume tokens in an inner loop, we can't use the normal for loop to read the tokens
173        let mut iter = token_source.into_iter();
174        while let Some(token) = iter.next() {
175            match token {
176                // If a colon token, then compile the word definition
177                ForthToken::Colon => {
178                    // Get the next token which has to be a command token, or its an error, this token will be the name to compile to
179                    if let Some(ForthToken::Command(word_name)) = iter.next() {
180                        // This is the list of tokens we will be compiling
181                        let mut tvc = Vec::new();
182                        let mut found_semicolon = false;
183                        // Because this is an inner loop using the outer iterator, we can't use the normal for loop syntax
184                        while let Some(token) = iter.next() {
185                            match token {
186                                ForthToken::SemiColon => {
187                                    // We have found the end of the word definition, so compile to opcodes and put into memory...
188                                    self.compile_tokens_as_word(word_name, &tvc)?;
189                                    found_semicolon = true;
190                                    break;
191                                }
192                                _ => tvc.push(token),
193                            }
194                        }
195                        if !found_semicolon {
196                            return Err(ForthError::MissingSemicolonAfterColon);
197                        }
198                    } else {
199                        // The command token has to be right after the colon token, we don't permit things like comments, we could though...
200                        return Err(ForthError::MissingCommandAfterColon);
201                    }
202                }
203                ForthToken::SemiColon => {
204                    return Err(ForthError::SemicolonBeforeColon);
205                }
206                _ => {
207                    tvi.push(token);
208                }
209            }
210        }
211
212        let mut compiled_tokens = self.compile_token_vector(&tvi)?;
213
214        // We need to return after running the interactive opcodes, so put the return in now
215        compiled_tokens.push(Opcode::RET);
216
217        Ok(compiled_tokens)
218    }
219
220    fn compile_tokens_as_word(
221        &mut self,
222        word_name: &str,
223        tokens: &[ForthToken],
224    ) -> Result<(), ForthError> {
225        // Remove anything extraneous from the end of the opcode array (*processor memory*),
226        // typically previous immediate mode tokens
227        self.sm.st.opcodes.resize(self.last_function, Opcode::NOP);
228
229        // Get the compiled assembler from the token vector
230        let mut compiled = self.compile_token_vector(tokens)?;
231        // Put the return OpCode onto the end
232        compiled.push(Opcode::RET);
233        // The current function start is the end of the last function
234        let function_start = self.last_function;
235        // Move last function pointer
236        self.last_function += compiled.len();
237        // Remember the opcodes for reflection purposes if its enabled
238        #[cfg(feature="enable_reflection")]
239        self.word_opcodes.insert(word_name.to_owned(),compiled.clone());
240        // Add the function to the opcode memory
241        self.sm.st.opcodes.append(&mut compiled);
242        // Remember where to find it...
243        self.word_addresses
244            .insert(word_name.to_owned(), function_start);
245        //        println!("Token Memory {:?}", self.sm.st.opcodes);
246        //        println!("Word Addresses {:?}", self.word_addresses);
247        //        println!("Last function {}", self.last_function);
248        #[cfg(feature="enable_reflection")]
249        self.word_definitions.insert(word_name.to_owned(),format!("{:?}",tokens));
250        Ok(())
251    }
252
253    fn compile_token_vector(
254        &mut self,
255        token_vector: &[ForthToken],
256    ) -> Result<Vec<Opcode>, ForthError> {
257        // Stack of if statements, they are deferred until the THEN Forth word
258        let mut deferred_statements = Vec::new();
259        // List of compiled processor opcodes that we are building up
260        let mut tv: Vec<Opcode> = Vec::new();
261
262        // Go through all the Forth tokens and turn them into processor Opcodes (for our StackMachine emulated processor)
263        for t in token_vector.iter() {
264            match t {
265                ForthToken::DropLineComment(_) => (),
266                ForthToken::ParenthesizedRemark(_) => (),
267                ForthToken::StringCommand(_, _) => (),
268                ForthToken::Number(n) => {
269                    // Numbers get pushed as a LDI opcode
270                    tv.push(Opcode::LDI(*n));
271                }
272                ForthToken::Command(s) => {
273                    // Remember where we are in the list of opcodes in case we hit a IF statement, LOOP etc...
274                    let current_instruction = tv.len();
275
276                    match s.as_ref() {
277                        "DO" => {
278                            let start_of_loop_code = current_instruction;
279                            // This eats the loop parameters from the number stack...
280                            tv.push(Opcode::PUSHLP);
281                            let logical_start_of_loop = tv.len();
282                            deferred_statements.push(DeferredStatement::DoLoop(
283                                DeferredDoLoopStatement::new(
284                                    start_of_loop_code,
285                                    logical_start_of_loop,
286                                ),
287                                LoopExits::new(),
288                            ));
289                        }
290                        "LOOP" => {
291                            if let Some(DeferredStatement::DoLoop(loop_def, loop_exits)) =
292                                deferred_statements.pop()
293                            {
294                                let jump_back = i64::try_from(loop_def.logical_start).unwrap()
295                                    - i64::try_from(current_instruction).unwrap()
296                                    // Have to jump back over the JR and the LDI
297                                    - 3;
298                                tv.push(Opcode::INCLP);
299                                tv.push(Opcode::CMPLOOP);
300                                tv.push(Opcode::LDI(jump_back));
301                                tv.push(Opcode::JRZ);
302
303                                loop_exits.fixup_loop_exits(&mut tv);
304                            } else {
305                                return Err(ForthError::InvalidSyntax(
306                                    "LOOP without proper loop start like DO".to_owned(),
307                                ));
308                            }
309                            tv.push(Opcode::DROPLP);
310                        }
311                        "+LOOP" => {
312                            if let Some(DeferredStatement::DoLoop(loop_def, loop_exits)) =
313                                deferred_statements.pop()
314                            {
315                                let jump_back = i64::try_from(loop_def.logical_start).unwrap()
316                                    - i64::try_from(current_instruction).unwrap()
317                                    // Have to jump back over the JR and the LDI
318                                    - 3;
319                                tv.push(Opcode::ADDLP);
320                                tv.push(Opcode::CMPLOOP);
321                                tv.push(Opcode::LDI(jump_back));
322                                tv.push(Opcode::JRZ);
323
324                                loop_exits.fixup_loop_exits(&mut tv);
325                            } else {
326                                return Err(ForthError::InvalidSyntax(
327                                    "+LOOP without proper loop start like DO".to_owned(),
328                                ));
329                            }
330                            tv.push(Opcode::DROPLP);
331                        }
332                        "LEAVE" => {
333                            let most_recent_loop_statement =
334                                deferred_statements.iter_mut().rev().find(|x| match **x {
335                                    DeferredStatement::If(_) => false,
336                                    DeferredStatement::DoLoop(_, _) => true,
337                                    DeferredStatement::BeginLoop(_, _) => true,
338                                });
339                            if let Some(deferred_statement) = most_recent_loop_statement {
340                                let loop_exits =
341                                    match deferred_statement {
342                                        DeferredStatement::DoLoop(_, loop_exits) => loop_exits,
343                                        DeferredStatement::BeginLoop(_, loop_exits) => loop_exits,
344                                        _ => return Err(ForthError::InvalidSyntax(
345                                            "LEAVE without proper loop start like DO or BEGIN(1)"
346                                                .to_owned(),
347                                        )),
348                                    };
349                                // Record the exit point
350                                loop_exits.add_exit_point(current_instruction);
351
352                                // We fix up the jumps once we get the end of loop
353                                tv.push(Opcode::LDI(0));
354                                tv.push(Opcode::JR);
355                            } else {
356                                return Err(ForthError::InvalidSyntax(
357                                    "LEAVE without proper loop start like DO or BEGIN(2)"
358                                        .to_owned(),
359                                ));
360                            }
361                        }
362                        "BEGIN" => {
363                            deferred_statements.push(DeferredStatement::BeginLoop(
364                                DeferredBeginLoopStatement::new(current_instruction),
365                                LoopExits::new(),
366                            ));
367                        }
368                        "UNTIL" => {
369                            if let Some(DeferredStatement::BeginLoop(loop_def, loop_exits)) =
370                                deferred_statements.pop()
371                            {
372                                let jump_back = i64::try_from(loop_def.logical_start).unwrap()
373                                    - i64::try_from(current_instruction).unwrap()
374                                    // Have to jump back over the JR and the LDI
375                                    - 1;
376                                tv.push(Opcode::LDI(jump_back));
377                                tv.push(Opcode::JRZ);
378
379                                loop_exits.fixup_loop_exits(&mut tv);
380                            } else {
381                                return Err(ForthError::InvalidSyntax(
382                                    "UNTIL without proper loop start like BEGIN".to_owned(),
383                                ));
384                            }
385                        }
386                        "WHILE" => {
387                            if let Some(DeferredStatement::BeginLoop(_loop_def, loop_exits)) =
388                                deferred_statements.last_mut()
389                            {
390                                loop_exits.add_exit_point(current_instruction);
391                                // We fix up the jumps once we get the end of loop
392                                tv.push(Opcode::LDI(0));
393                                tv.push(Opcode::JRZ);
394                            } else {
395                                return Err(ForthError::InvalidSyntax(
396                                    "WHILE without proper loop start like BEGIN".to_owned(),
397                                ));
398                            }
399                        }
400                        "REPEAT" => {
401                            if let Some(DeferredStatement::BeginLoop(loop_def, loop_exits)) =
402                                deferred_statements.pop()
403                            {
404                                let jump_back = i64::try_from(loop_def.logical_start).unwrap()
405                                    - i64::try_from(current_instruction).unwrap()
406                                    // Have to jump back over the JR and the LDI
407                                    - 1;
408                                tv.push(Opcode::LDI(jump_back));
409                                tv.push(Opcode::JR);
410
411                                loop_exits.fixup_loop_exits(&mut tv);
412                            } else {
413                                return Err(ForthError::InvalidSyntax(
414                                    "AGAIN without proper loop start like BEGIN".to_owned(),
415                                ));
416                            }
417                        }
418                        "AGAIN" => {
419                            if let Some(DeferredStatement::BeginLoop(loop_def, loop_exits)) =
420                                deferred_statements.pop()
421                            {
422                                let jump_back = i64::try_from(loop_def.logical_start).unwrap()
423                                    - i64::try_from(current_instruction).unwrap()
424                                    // Have to jump back over the JR and the LDI
425                                    - 1;
426                                tv.push(Opcode::LDI(jump_back));
427                                tv.push(Opcode::JR);
428
429                                loop_exits.fixup_loop_exits(&mut tv);
430                            } else {
431                                return Err(ForthError::InvalidSyntax(
432                                    "AGAIN without proper loop start like BEGIN".to_owned(),
433                                ));
434                            }
435                        }
436                        // FLAG 0 = Skip stuff inside IF, !0 = Run stuff inside IF
437                        "IF" => {
438                            deferred_statements.push(DeferredStatement::If(
439                                DeferredIfStatement::new(current_instruction),
440                            ));
441                            //println!("(IF)Deferred If Stack {:?}", deferred_if_statements);
442                            tv.push(Opcode::LDI(0));
443                            tv.push(Opcode::JRZ);
444                        }
445                        "ELSE" => {
446                            if let Some(DeferredStatement::If(x)) = deferred_statements.last_mut() {
447                                x.else_location = Some(current_instruction);
448                                //println!("(ELSE) Deferred If Stack {:?}", deferred_if_statements);
449                                tv.push(Opcode::LDI(0));
450                                tv.push(Opcode::JR);
451                            } else {
452                                return Err(ForthError::InvalidSyntax(
453                                    "ELSE without IF".to_owned(),
454                                ));
455                            }
456                        }
457                        "THEN" => {
458                            // This only works if there isn't an ELSE statement, it needs to jump differently if there is an ELSE statement
459                            //println!("(THEN) Deferred If Stack {:?}", deferred_if_statements);
460                            if let Some(DeferredStatement::If(x)) = deferred_statements.pop() {
461                                //println!("(if let Some(x)) Deferred If Stack {:?}", x);
462                                let if_jump_location = x.if_location;
463                                let if_jump_offset = match x.else_location {
464                                    None => (current_instruction as u64
465                                        - (x.if_location + 1) as u64)
466                                        .try_into()
467                                        .unwrap(),
468                                    Some(el) => (current_instruction as u64 - el as u64 + 1)
469                                        .try_into()
470                                        .unwrap(),
471                                };
472                                let (else_jump_location, else_jump_offset): (
473                                    Option<usize>,
474                                    Option<i64>,
475                                ) = match x.else_location {
476                                    Some(x) => (
477                                        Some(x),
478                                        Some(
479                                            i64::try_from(
480                                                current_instruction as u64 - (x + 1) as u64,
481                                            )
482                                            .unwrap(),
483                                        ),
484                                    ),
485                                    None => (None, None),
486                                };
487                                //println!("if structure: {:?}", x);
488                                tv[if_jump_location] = Opcode::LDI(if_jump_offset);
489                                if let (Some(location), Some(offset)) =
490                                    (else_jump_location, else_jump_offset)
491                                {
492                                    tv[location] = Opcode::LDI(offset);
493                                }
494                            } else {
495                                return Err(ForthError::InvalidSyntax(
496                                    "THEN without IF".to_owned(),
497                                ));
498                            }
499                        }
500                        _ => {
501                            if let Some(offset) = self.word_addresses.get(*s) {
502                                tv.push(Opcode::LDI(*offset as i64));
503                                tv.push(Opcode::CALL);
504                            } else if let Some(ol) = self.intrinsic_words.get::<str>(s) {
505                                tv.append(&mut ol.clone());
506                            } else {
507                                return Err(ForthError::UnknownToken(s.to_string()));
508                            }
509                        }
510                    }
511                }
512                ForthToken::Colon => {
513                    panic!("Colon should never reach this function");
514                }
515                ForthToken::SemiColon => {
516                    panic!("SemiColon should never reach this function");
517                }
518            }
519        }
520
521        Ok(tv)
522    }
523
524    fn execute_tokens(
525        &mut self,
526        token_source: &ForthTokenizer,
527        gas_limit: GasLimit,
528    ) -> Result<(), ForthError> {
529        let mut ol = self.compile_tokens_compile_and_remove_word_definitions(token_source)?;
530        //println!("Compiled Opcodes: {:?}", ol);
531        self.sm.st.opcodes.resize(self.last_function, Opcode::NOP);
532        self.sm.st.opcodes.append(&mut ol);
533        self.sm.execute(self.last_function, gas_limit)?;
534        //println!("Total opcodes defined: {}", self.sm.st.opcodes.len());
535        //println!("Total opcodes executed: {}", self.sm.st.gas_used());
536
537        Ok(())
538    }
539
540    pub fn execute_string(&mut self, s: &str, gas_limit: GasLimit) -> Result<(), ForthError> {
541        let tokenizer = ForthTokenizer::new(&s);
542        self.execute_tokens(&tokenizer, gas_limit)?;
543        Ok(())
544    }
545}