rust_forth_compiler/
lib.rs

1use rust_forth_tokenizer::{ForthToken, ForthTokenizer};
2pub use rust_simple_stack_processor::GasLimit;
3use rust_simple_stack_processor::{Opcode, StackMachine};
4use std::collections::HashMap;
5use std::convert::{TryFrom, TryInto};
6
7mod error;
8pub use error::ForthError;
9
10#[cfg(test)]
11mod tests;
12
13pub struct ForthCompiler {
14    // This is the Stack Machine processor that runs the compiled Forth instructions
15    pub sm: StackMachine,
16    // These are the words that we know how to work with regardless, things like DROP, MUL, etc
17    intrinsic_words: HashMap<&'static str, Vec<Opcode>>,
18    // This is where we remember where we put compiled words in the *memory* of the StackMachine
19    // We run the interactive opcodes after these compiled words, and then erase the memory after
20    // the compiled words again for the next batch of interactive opcodes.
21    #[cfg(feature = "enable_reflection")]
22    pub word_addresses: HashMap<String, usize>,
23    #[cfg(not(feature = "enable_reflection"))]
24    word_addresses: HashMap<String, usize>,
25    // This is the location in memory that points to the location after the last compiled opcode
26    // So its an ideal place to run interactive compiled opcodes
27    last_function: usize,
28    // Remember the definition for words
29    #[cfg(feature = "enable_reflection")]
30    pub word_definitions: HashMap<String, String>,
31    // Remember the opcodes for words
32    #[cfg(feature = "enable_reflection")]
33    pub word_opcodes: HashMap<String, Vec<Opcode>>,
34}
35
36impl Default for ForthCompiler {
37    fn default() -> ForthCompiler {
38        ForthCompiler {
39            sm: StackMachine::default(),
40            intrinsic_words: HashMap::from([
41                ("SWAP", vec![Opcode::SWAP]),
42                ("NOT", vec![Opcode::NOT]),
43                ("ADD", vec![Opcode::ADD]),
44                ("SUB", vec![Opcode::SUB]),
45                ("MUL", vec![Opcode::MUL]),
46                ("DIV", vec![Opcode::DIV]),
47                ("DUP", vec![Opcode::DUP]),
48                ("2DUP", vec![Opcode::DUP2]),
49                ("TRAP", vec![Opcode::TRAP]),
50                ("DROP", vec![Opcode::DROP]),
51                ("2DROP", vec![Opcode::DROP, Opcode::DROP]),
52                ("2OVER", vec![Opcode::OVER2]),
53                ("2SWAP", vec![Opcode::SWAP2]),
54                ("1+", vec![Opcode::LDI(1), Opcode::ADD]),
55                ("1-", vec![Opcode::LDI(-1), Opcode::ADD]),
56                ("2+", vec![Opcode::LDI(2), Opcode::ADD]),
57                ("2-", vec![Opcode::LDI(-2), Opcode::ADD]),
58                ("2*", vec![Opcode::LDI(2), Opcode::MUL]),
59                ("2/", vec![Opcode::LDI(2), Opcode::DIV]),
60                ("I", vec![Opcode::GETLP]),
61                ("J", vec![Opcode::GETLP2]),
62                ("AND", vec![Opcode::AND]),
63                ("=", vec![Opcode::SUB, Opcode::CMPZ]),
64                ("<>", vec![Opcode::SUB, Opcode::CMPNZ]),
65            ]),
66            word_addresses: HashMap::new(),
67            last_function: 0,
68            #[cfg(feature = "enable_reflection")]
69            word_definitions: HashMap::new(),
70            #[cfg(feature = "enable_reflection")]
71            word_opcodes: HashMap::new(),
72        }
73    }
74}
75
76// This struct tracks information for Forth IF statements
77#[derive(Debug)]
78struct DeferredIfStatement {
79    if_location: usize,
80    else_location: Option<usize>,
81}
82
83impl DeferredIfStatement {
84    pub fn new(if_location: usize) -> Self {
85        Self {
86            if_location,
87            else_location: None,
88        }
89    }
90}
91
92// This struct tracks information for Forth Loop statements
93#[derive(Debug)]
94struct DeferredDoLoopStatement {
95    _prelude_start: usize, // This is only used for debugging the internal code
96    logical_start: usize,
97}
98
99impl DeferredDoLoopStatement {
100    pub fn new(_prelude_start: usize, logical_start: usize) -> Self {
101        Self {
102            _prelude_start,
103            logical_start,
104        }
105    }
106}
107
108#[derive(Debug)]
109struct LoopExits {
110    loop_exit_locations: Vec<usize>,
111}
112
113impl LoopExits {
114    pub fn new() -> Self {
115        Self {
116            loop_exit_locations: Vec::new(),
117        }
118    }
119
120    pub fn add_exit_point(&mut self, loop_exit_location: usize) {
121        self.loop_exit_locations.push(loop_exit_location);
122    }
123
124    fn fixup_loop_exits(&self, opcode_vector: &mut Vec<Opcode>) {
125        let loop_exit_point = opcode_vector.len();
126        for leave_point in self.loop_exit_locations.iter() {
127            let jump_forward =
128                i64::try_from(loop_exit_point).unwrap() - i64::try_from(*leave_point).unwrap() - 1;
129            opcode_vector[*leave_point] = Opcode::LDI(jump_forward);
130        }
131    }
132}
133
134#[derive(Debug)]
135struct DeferredBeginLoopStatement {
136    logical_start: usize,
137}
138
139impl DeferredBeginLoopStatement {
140    pub fn new(logical_start: usize) -> Self {
141        Self { logical_start }
142    }
143}
144
145enum DeferredStatement {
146    If(DeferredIfStatement),
147    DoLoop(DeferredDoLoopStatement, LoopExits),
148    BeginLoop(DeferredBeginLoopStatement, LoopExits),
149}
150
151impl ForthCompiler {
152    fn compile_tokens_compile_and_remove_word_definitions(
153        &mut self,
154        token_source: &ForthTokenizer,
155    ) -> Result<Vec<Opcode>, ForthError> {
156        // This is the interactive compiled token list
157        let mut tvi = Vec::new();
158
159        // Because we consume tokens in an inner loop, we can't use the normal for loop to read the tokens
160        let mut iter = token_source.into_iter();
161        while let Some(token) = iter.next() {
162            match token {
163                // If a colon token, then compile the word definition
164                ForthToken::Colon => {
165                    // Get the next token which has to be a command token, or its an error, this token will be the name to compile to
166                    if let Some(ForthToken::Command(word_name)) = iter.next() {
167                        // This is the list of tokens we will be compiling
168                        let mut tvc = Vec::new();
169                        let mut found_semicolon = false;
170                        // Because this is an inner loop using the outer iterator, we can't use the normal for loop syntax
171                        while let Some(token) = iter.next() {
172                            match token {
173                                ForthToken::SemiColon => {
174                                    // We have found the end of the word definition, so compile to opcodes and put into memory...
175                                    self.compile_tokens_as_word(word_name, &tvc)?;
176                                    found_semicolon = true;
177                                    break;
178                                }
179                                _ => tvc.push(token),
180                            }
181                        }
182                        if !found_semicolon {
183                            return Err(ForthError::MissingSemicolonAfterColon);
184                        }
185                    } else {
186                        // The command token has to be right after the colon token, we don't permit things like comments, we could though...
187                        return Err(ForthError::MissingCommandAfterColon);
188                    }
189                }
190                ForthToken::SemiColon => {
191                    return Err(ForthError::SemicolonBeforeColon);
192                }
193                _ => {
194                    tvi.push(token);
195                }
196            }
197        }
198
199        let mut compiled_tokens = self.compile_token_vector(&tvi)?;
200
201        // We need to return after running the interactive opcodes, so put the return in now
202        compiled_tokens.push(Opcode::RET);
203
204        Ok(compiled_tokens)
205    }
206
207    fn compile_tokens_as_word(
208        &mut self,
209        word_name: &str,
210        tokens: &[ForthToken],
211    ) -> Result<(), ForthError> {
212        // Remove anything extraneous from the end of the opcode array (*processor memory*),
213        // typically previous immediate mode tokens
214        self.sm.st.opcodes.resize(self.last_function, Opcode::NOP);
215
216        // Get the compiled assembler from the token vector
217        let mut compiled = self.compile_token_vector(tokens)?;
218        // Put the return OpCode onto the end
219        compiled.push(Opcode::RET);
220        // The current function start is the end of the last function
221        let function_start = self.last_function;
222        // Move last function pointer
223        self.last_function += compiled.len();
224        // Remember the opcodes for reflection purposes if its enabled
225        #[cfg(feature = "enable_reflection")]
226        self.word_opcodes
227            .insert(word_name.to_owned(), compiled.clone());
228        // Add the function to the opcode memory
229        self.sm.st.opcodes.append(&mut compiled);
230        // Remember where to find it...
231        self.word_addresses
232            .insert(word_name.to_owned(), function_start);
233        //        println!("Token Memory {:?}", self.sm.st.opcodes);
234        //        println!("Word Addresses {:?}", self.word_addresses);
235        //        println!("Last function {}", self.last_function);
236        #[cfg(feature = "enable_reflection")]
237        self.word_definitions
238            .insert(word_name.to_owned(), format!("{:?}", tokens));
239        Ok(())
240    }
241
242    fn compile_token_vector(
243        &mut self,
244        token_vector: &[ForthToken],
245    ) -> Result<Vec<Opcode>, ForthError> {
246        // Stack of if statements, they are deferred until the THEN Forth word
247        let mut deferred_statements = Vec::new();
248        // List of compiled processor opcodes that we are building up
249        let mut tv: Vec<Opcode> = Vec::new();
250
251        // Go through all the Forth tokens and turn them into processor Opcodes (for our StackMachine emulated processor)
252        for t in token_vector.iter() {
253            match t {
254                ForthToken::DropLineComment(_) => (),
255                ForthToken::ParenthesizedRemark(_) => (),
256                ForthToken::StringCommand(_, _) => (),
257                ForthToken::Number(n) => {
258                    // Numbers get pushed as a LDI opcode
259                    tv.push(Opcode::LDI(*n));
260                }
261                ForthToken::Command(s) => {
262                    // Remember where we are in the list of opcodes in case we hit a IF statement, LOOP etc...
263                    let current_instruction = tv.len();
264
265                    match s.as_ref() {
266                        "DO" => {
267                            let start_of_loop_code = current_instruction;
268                            // This eats the loop parameters from the number stack...
269                            tv.push(Opcode::PUSHLP);
270                            let logical_start_of_loop = tv.len();
271                            deferred_statements.push(DeferredStatement::DoLoop(
272                                DeferredDoLoopStatement::new(
273                                    start_of_loop_code,
274                                    logical_start_of_loop,
275                                ),
276                                LoopExits::new(),
277                            ));
278                        }
279                        "LOOP" => {
280                            if let Some(DeferredStatement::DoLoop(loop_def, loop_exits)) =
281                                deferred_statements.pop()
282                            {
283                                let jump_back = i64::try_from(loop_def.logical_start).unwrap()
284                                    - i64::try_from(current_instruction).unwrap()
285                                    // Have to jump back over the JR and the LDI
286                                    - 3;
287                                tv.push(Opcode::INCLP);
288                                tv.push(Opcode::CMPLOOP);
289                                tv.push(Opcode::LDI(jump_back));
290                                tv.push(Opcode::JRZ);
291
292                                loop_exits.fixup_loop_exits(&mut tv);
293                            } else {
294                                return Err(ForthError::InvalidSyntax(
295                                    "LOOP without proper loop start like DO".to_owned(),
296                                ));
297                            }
298                            tv.push(Opcode::DROPLP);
299                        }
300                        "+LOOP" => {
301                            if let Some(DeferredStatement::DoLoop(loop_def, loop_exits)) =
302                                deferred_statements.pop()
303                            {
304                                let jump_back = i64::try_from(loop_def.logical_start).unwrap()
305                                    - i64::try_from(current_instruction).unwrap()
306                                    // Have to jump back over the JR and the LDI
307                                    - 3;
308                                tv.push(Opcode::ADDLP);
309                                tv.push(Opcode::CMPLOOP);
310                                tv.push(Opcode::LDI(jump_back));
311                                tv.push(Opcode::JRZ);
312
313                                loop_exits.fixup_loop_exits(&mut tv);
314                            } else {
315                                return Err(ForthError::InvalidSyntax(
316                                    "+LOOP without proper loop start like DO".to_owned(),
317                                ));
318                            }
319                            tv.push(Opcode::DROPLP);
320                        }
321                        "LEAVE" => {
322                            let most_recent_loop_statement =
323                                deferred_statements.iter_mut().rev().find(|x| match **x {
324                                    DeferredStatement::If(_) => false,
325                                    DeferredStatement::DoLoop(_, _) => true,
326                                    DeferredStatement::BeginLoop(_, _) => true,
327                                });
328                            if let Some(deferred_statement) = most_recent_loop_statement {
329                                let loop_exits =
330                                    match deferred_statement {
331                                        DeferredStatement::DoLoop(_, loop_exits) => loop_exits,
332                                        DeferredStatement::BeginLoop(_, loop_exits) => loop_exits,
333                                        _ => return Err(ForthError::InvalidSyntax(
334                                            "LEAVE without proper loop start like DO or BEGIN(1)"
335                                                .to_owned(),
336                                        )),
337                                    };
338                                // Record the exit point
339                                loop_exits.add_exit_point(current_instruction);
340
341                                // We fix up the jumps once we get the end of loop
342                                tv.push(Opcode::LDI(0));
343                                tv.push(Opcode::JR);
344                            } else {
345                                return Err(ForthError::InvalidSyntax(
346                                    "LEAVE without proper loop start like DO or BEGIN(2)"
347                                        .to_owned(),
348                                ));
349                            }
350                        }
351                        "BEGIN" => {
352                            deferred_statements.push(DeferredStatement::BeginLoop(
353                                DeferredBeginLoopStatement::new(current_instruction),
354                                LoopExits::new(),
355                            ));
356                        }
357                        "UNTIL" => {
358                            if let Some(DeferredStatement::BeginLoop(loop_def, loop_exits)) =
359                                deferred_statements.pop()
360                            {
361                                let jump_back = i64::try_from(loop_def.logical_start).unwrap()
362                                    - i64::try_from(current_instruction).unwrap()
363                                    // Have to jump back over the JR and the LDI
364                                    - 1;
365                                tv.push(Opcode::LDI(jump_back));
366                                tv.push(Opcode::JRZ);
367
368                                loop_exits.fixup_loop_exits(&mut tv);
369                            } else {
370                                return Err(ForthError::InvalidSyntax(
371                                    "UNTIL without proper loop start like BEGIN".to_owned(),
372                                ));
373                            }
374                        }
375                        "WHILE" => {
376                            if let Some(DeferredStatement::BeginLoop(_loop_def, loop_exits)) =
377                                deferred_statements.last_mut()
378                            {
379                                loop_exits.add_exit_point(current_instruction);
380                                // We fix up the jumps once we get the end of loop
381                                tv.push(Opcode::LDI(0));
382                                tv.push(Opcode::JRZ);
383                            } else {
384                                return Err(ForthError::InvalidSyntax(
385                                    "WHILE without proper loop start like BEGIN".to_owned(),
386                                ));
387                            }
388                        }
389                        "REPEAT" => {
390                            if let Some(DeferredStatement::BeginLoop(loop_def, loop_exits)) =
391                                deferred_statements.pop()
392                            {
393                                let jump_back = i64::try_from(loop_def.logical_start).unwrap()
394                                    - i64::try_from(current_instruction).unwrap()
395                                    // Have to jump back over the JR and the LDI
396                                    - 1;
397                                tv.push(Opcode::LDI(jump_back));
398                                tv.push(Opcode::JR);
399
400                                loop_exits.fixup_loop_exits(&mut tv);
401                            } else {
402                                return Err(ForthError::InvalidSyntax(
403                                    "AGAIN without proper loop start like BEGIN".to_owned(),
404                                ));
405                            }
406                        }
407                        "AGAIN" => {
408                            if let Some(DeferredStatement::BeginLoop(loop_def, loop_exits)) =
409                                deferred_statements.pop()
410                            {
411                                let jump_back = i64::try_from(loop_def.logical_start).unwrap()
412                                    - i64::try_from(current_instruction).unwrap()
413                                    // Have to jump back over the JR and the LDI
414                                    - 1;
415                                tv.push(Opcode::LDI(jump_back));
416                                tv.push(Opcode::JR);
417
418                                loop_exits.fixup_loop_exits(&mut tv);
419                            } else {
420                                return Err(ForthError::InvalidSyntax(
421                                    "AGAIN without proper loop start like BEGIN".to_owned(),
422                                ));
423                            }
424                        }
425                        // FLAG 0 = Skip stuff inside IF, !0 = Run stuff inside IF
426                        "IF" => {
427                            deferred_statements.push(DeferredStatement::If(
428                                DeferredIfStatement::new(current_instruction),
429                            ));
430                            //println!("(IF)Deferred If Stack {:?}", deferred_if_statements);
431                            tv.push(Opcode::LDI(0));
432                            tv.push(Opcode::JRZ);
433                        }
434                        "ELSE" => {
435                            if let Some(DeferredStatement::If(x)) = deferred_statements.last_mut() {
436                                x.else_location = Some(current_instruction);
437                                //println!("(ELSE) Deferred If Stack {:?}", deferred_if_statements);
438                                tv.push(Opcode::LDI(0));
439                                tv.push(Opcode::JR);
440                            } else {
441                                return Err(ForthError::InvalidSyntax(
442                                    "ELSE without IF".to_owned(),
443                                ));
444                            }
445                        }
446                        "THEN" => {
447                            // This only works if there isn't an ELSE statement, it needs to jump differently if there is an ELSE statement
448                            //println!("(THEN) Deferred If Stack {:?}", deferred_if_statements);
449                            if let Some(DeferredStatement::If(x)) = deferred_statements.pop() {
450                                //println!("(if let Some(x)) Deferred If Stack {:?}", x);
451                                let if_jump_location = x.if_location;
452                                let if_jump_offset = match x.else_location {
453                                    None => (current_instruction as u64
454                                        - (x.if_location + 1) as u64)
455                                        .try_into()
456                                        .unwrap(),
457                                    Some(el) => (current_instruction as u64 - el as u64 + 1)
458                                        .try_into()
459                                        .unwrap(),
460                                };
461                                let (else_jump_location, else_jump_offset): (
462                                    Option<usize>,
463                                    Option<i64>,
464                                ) = match x.else_location {
465                                    Some(x) => (
466                                        Some(x),
467                                        Some(
468                                            i64::try_from(
469                                                current_instruction as u64 - (x + 1) as u64,
470                                            )
471                                            .unwrap(),
472                                        ),
473                                    ),
474                                    None => (None, None),
475                                };
476                                //println!("if structure: {:?}", x);
477                                tv[if_jump_location] = Opcode::LDI(if_jump_offset);
478                                if let (Some(location), Some(offset)) =
479                                    (else_jump_location, else_jump_offset)
480                                {
481                                    tv[location] = Opcode::LDI(offset);
482                                }
483                            } else {
484                                return Err(ForthError::InvalidSyntax(
485                                    "THEN without IF".to_owned(),
486                                ));
487                            }
488                        }
489                        _ => {
490                            if let Some(&offset) = self.word_addresses.get(*s) {
491                                tv.push(Opcode::LDI(offset as i64));
492                                tv.push(Opcode::CALL);
493                            } else if let Some(ol) = self.intrinsic_words.get(*s) {
494                                tv.extend_from_slice(ol);
495                            } else {
496                                return Err(ForthError::UnknownToken((*s).to_string()));
497                            }
498                        }
499                    }
500                }
501                ForthToken::Colon => {
502                    unreachable!("Colon should never reach this function");
503                }
504                ForthToken::SemiColon => {
505                    unreachable!("SemiColon should never reach this function");
506                }
507            }
508        }
509
510        Ok(tv)
511    }
512
513    fn execute_tokens(
514        &mut self,
515        token_source: &ForthTokenizer,
516        gas_limit: GasLimit,
517    ) -> Result<(), ForthError> {
518        let mut ol = self.compile_tokens_compile_and_remove_word_definitions(token_source)?;
519        //println!("Compiled Opcodes: {:?}", ol);
520        self.sm.st.opcodes.resize(self.last_function, Opcode::NOP);
521        self.sm.st.opcodes.append(&mut ol);
522        self.sm.execute(self.last_function, gas_limit)?;
523        //println!("Total opcodes defined: {}", self.sm.st.opcodes.len());
524        //println!("Total opcodes executed: {}", self.sm.st.gas_used());
525
526        Ok(())
527    }
528
529    pub fn execute_string(&mut self, s: &str, gas_limit: GasLimit) -> Result<(), ForthError> {
530        let tokenizer = ForthTokenizer::new(s);
531        self.execute_tokens(&tokenizer, gas_limit)
532    }
533}