Skip to main content

endbasic_core/compiler/
mod.rs

1// EndBASIC
2// Copyright 2026 Julio Merino
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Compiler for the EndBASIC language into bytecode.
18
19use crate::ast::{ExprType, VarRef};
20use crate::bytecode::{InvalidExitCodeError, RegisterScope};
21use crate::callable::CallableMetadata;
22use crate::image::Image;
23use crate::reader::LineCol;
24use crate::{Callable, parser};
25use std::collections::HashMap;
26use std::io;
27use std::rc::Rc;
28
29mod args;
30
31mod codegen;
32
33mod exprs;
34
35mod ids;
36
37mod syms;
38pub use syms::SymbolKey;
39use syms::{GlobalSymtable, LocalSymtable, LocalSymtableSnapshot};
40
41mod top;
42use top::{Context, prepare_globals};
43pub use top::{GlobalDef, GlobalDefKind, only_metadata};
44
45/// Errors that can occur during compilation.
46#[derive(Debug, thiserror::Error)]
47pub enum Error {
48    /// Attempt to redefine an already-defined symbol.
49    #[error("{0}: Cannot redefine {1}")]
50    AlreadyDefined(LineCol, VarRef),
51
52    /// Array name used without subscripts (as a scalar).
53    #[error("{0}: {1} is an array and requires subscripts")]
54    ArrayUsedAsScalar(LineCol, VarRef),
55
56    /// Type mismatch in a binary operation.
57    #[error("{0}: Cannot {1} {2} and {3}")]
58    BinaryOpType(LineCol, &'static str, ExprType, ExprType),
59
60    /// Callable invoked with incorrect syntax.
61    // TODO(jmmv): It'd be nice if we could carry an Rc<CallableMetadata> here to avoid copying
62    // but... because of async in consumers, we would need an `Arc` instead just for this single
63    // error type.   Given that performance during error propagation is not important, the copy
64    // is just fine.  If we ever have to pollute everything with `Arc`s in the future, then we
65    // could do this.
66    #[error("{0}: {} expected {}", .1.name(), .1.syntax())]
67    CallableSyntax(LineCol, CallableMetadata),
68
69    /// Attempt to nest FUNCTION or SUB definitions.
70    #[error("{0}: Cannot nest FUNCTION or SUB declarations nor definitions")]
71    CannotNestUserCallables(LineCol),
72
73    /// Attempt to redefine an already-defined label.
74    #[error("{0}: Duplicate label {1}")]
75    DuplicateLabel(LineCol, String),
76
77    /// Type annotation in a reference doesn't match the variable's type.
78    #[error("{0}: Incompatible type annotation in {1} reference")]
79    IncompatibleTypeAnnotationInReference(LineCol, VarRef),
80
81    /// Type mismatch in an assignment.
82    #[error("{0}: Cannot assign value of type {1} to variable of type {2}")]
83    IncompatibleTypesInAssignment(LineCol, ExprType, ExprType),
84
85    /// `END` code is out of range.
86    #[error("{0}: {1}")]
87    InvalidEndCode(LineCol, String),
88
89    /// I/O error while reading the source.
90    #[error("{0}: I/O error during compilation: {1}")]
91    Io(LineCol, io::Error),
92
93    /// Attempt to call something that is not an array nor a function.
94    #[error("{0}: {1} is not an array nor a function")]
95    NotAFunction(LineCol, VarRef),
96
97    /// `EXIT` statement found outside its expected block.
98    #[error("{0}: EXIT {1} outside of {1}")]
99    MisplacedExit(LineCol, &'static str),
100
101    /// Attempt to index something that is not an array.
102    #[error("{0}: {1} is not an array")]
103    NotAnArray(LineCol, VarRef),
104
105    /// Expected a numeric type but got something else.
106    #[error("{0}: {1} is not a number")]
107    NotANumber(LineCol, ExprType),
108
109    /// Constants pool has been exhausted.
110    #[error("{0}: Out of constants")]
111    OutOfConstants(LineCol),
112
113    /// Register allocation has been exhausted.
114    #[error("{0}: Out of {1} registers")]
115    OutOfRegisters(LineCol, RegisterScope),
116
117    /// Upcall table has been exhausted.
118    #[error("{0}: Out of upcalls")]
119    OutOfUpcalls(LineCol),
120
121    /// Syntax error from the parser.
122    #[error("{0}: {1}")]
123    Parse(LineCol, String),
124
125    /// Jump or call target is too far away.
126    #[error("{0}: Jump/call target is {1} which is too far")]
127    TargetTooFar(LineCol, usize),
128
129    /// An array has too many dimensions.
130    #[error("{0}: Array cannot have {1} dimensions")]
131    TooManyArrayDimensions(LineCol, usize),
132
133    /// Type mismatch where a specific type was expected.
134    #[error("{0}: Expected {2} but found {1}")]
135    TypeMismatch(LineCol, ExprType, ExprType),
136
137    /// Reference to an undefined symbol.
138    #[error("{0}: Undefined symbol {1}")]
139    UndefinedSymbol(LineCol, VarRef),
140
141    /// Reference to an unknown label.
142    #[error("{0}: Unknown label {1}")]
143    UnknownLabel(LineCol, String),
144
145    /// Wrong number of subscripts for an array access.
146    #[error("{0}: Array requires {1} subscripts but got {2}")]
147    WrongNumberOfSubscripts(LineCol, usize, usize),
148}
149
150impl Error {
151    /// Splits a textual error message into its source position and message.
152    ///
153    /// TODO(jmmv): This is a hack to support the current needs of std and allow migrating its
154    /// code to this new implementation.  Once migrated, revisit this.
155    fn split_display_message(&self) -> (LineCol, String) {
156        let display = self.to_string();
157        let mut parts = display.splitn(3, ':');
158        let line = parts
159            .next()
160            .expect("CompilerError display always has line")
161            .parse()
162            .expect("CompilerError line is always numeric");
163        let col = parts
164            .next()
165            .expect("CompilerError display always has column")
166            .parse()
167            .expect("CompilerError column is always numeric");
168        let message =
169            parts.next().expect("CompilerError display always has message").trim_start().to_owned();
170        (LineCol { line, col }, message)
171    }
172
173    /// Returns the source position where this compilation error happened.
174    pub fn pos(&self) -> LineCol {
175        self.split_display_message().0
176    }
177
178    /// Returns this error's message without the source position prefix.
179    pub fn message_without_pos(&self) -> String {
180        self.split_display_message().1
181    }
182
183    /// Annotates an invalid `END` exit code error with a source position.
184    fn from_bytecode_invalid_exit_code(value: InvalidExitCodeError, pos: LineCol) -> Self {
185        Self::InvalidEndCode(pos, value.to_string())
186    }
187
188    /// Annotates an error from the symbol table with the position it arised from.
189    fn from_syms(value: syms::Error, pos: LineCol) -> Self {
190        match value {
191            syms::Error::AlreadyDefined(vref) => Error::AlreadyDefined(pos, vref),
192            syms::Error::IncompatibleTypeAnnotationInReference(vref) => {
193                Error::IncompatibleTypeAnnotationInReference(pos, vref)
194            }
195            syms::Error::OutOfRegisters(scope) => Error::OutOfRegisters(pos, scope),
196            syms::Error::UndefinedSymbol(vref, _scope) => Error::UndefinedSymbol(pos, vref),
197        }
198    }
199}
200
201impl From<parser::Error> for Error {
202    fn from(value: parser::Error) -> Self {
203        match value {
204            parser::Error::Bad(pos, message) => Self::Parse(pos, message),
205            parser::Error::Io(pos, e) => Self::Io(pos, e),
206        }
207    }
208}
209
210/// Result type for compilation operations.
211pub type Result<T> = std::result::Result<T, Error>;
212
213/// Compiler context.
214///
215/// This exists to support incremental compilation by keeping state and appending code to the
216/// image being built, which is useful in REPL scenarios.
217pub struct Compiler {
218    context: Context,
219    symtable: GlobalSymtable,
220    program_scope: LocalSymtableSnapshot,
221}
222
223impl Compiler {
224    /// Creates a new compiler instance.
225    ///
226    /// `global_defs` provides pre-defined global variables visible to the compiled program.
227    ///
228    /// `upcalls` contains the metadata of all built-in callables that the compiled code can use.
229    pub fn new(
230        upcalls: &HashMap<SymbolKey, Rc<dyn Callable>>,
231        global_defs: &[GlobalDef],
232    ) -> Result<Self> {
233        let mut upcalls_metadata = HashMap::with_capacity(upcalls.len());
234        for (k, v) in upcalls.iter() {
235            upcalls_metadata.insert(k.clone(), v.metadata());
236        }
237
238        let mut context = Context::default();
239
240        let mut symtable = GlobalSymtable::new(upcalls_metadata);
241        prepare_globals(&mut context, &mut symtable, global_defs)?;
242
243        Ok(Self { context, symtable, program_scope: LocalSymtableSnapshot::default() })
244    }
245
246    /// Compiles a chunk of code.
247    pub fn compile(mut self, input: &mut dyn io::Read) -> Result<Image> {
248        let mut image = Image::default();
249        self.compile_more(&mut image, input)?;
250        Ok(image)
251    }
252
253    /// Compiles a chunk of code and appends it to `image`.
254    pub fn compile_more(&mut self, image: &mut Image, input: &mut dyn io::Read) -> Result<()> {
255        let mut new_context = self.context.clone();
256        let mut new_symtable = self.symtable.clone();
257        let program_scope = LocalSymtable::restore(&mut new_symtable, self.program_scope.clone());
258        let (delta, snapshot) = top::compile(input, image, &mut new_context, program_scope)?;
259        image.append(delta);
260        self.context = new_context;
261        self.symtable = new_symtable;
262        self.program_scope = snapshot;
263        Ok(())
264    }
265}
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270
271    #[test]
272    fn test_error_pos_and_message_without_pos() {
273        let err = Error::Parse(LineCol { line: 3, col: 15 }, "Invalid token".to_owned());
274
275        assert_eq!(LineCol { line: 3, col: 15 }, err.pos());
276        assert_eq!("Invalid token", err.message_without_pos());
277    }
278
279    #[test]
280    fn test_error_message_without_pos_preserves_colons() {
281        let err =
282            Error::Parse(LineCol { line: 4, col: 9 }, "Expected INTEGER: got STRING".to_owned());
283
284        assert_eq!(LineCol { line: 4, col: 9 }, err.pos());
285        assert_eq!("Expected INTEGER: got STRING", err.message_without_pos());
286    }
287}