endbasic-core 0.13.0

The EndBASIC programming language - core
Documentation
// EndBASIC
// Copyright 2026 Julio Merino
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program.  If not, see <https://www.gnu.org/licenses/>.

//! Compiler for the EndBASIC language into bytecode.

use crate::ast::{ExprType, VarRef};
use crate::bytecode::{InvalidExitCodeError, RegisterScope};
use crate::callable::CallableMetadata;
use crate::image::Image;
use crate::reader::LineCol;
use crate::{Callable, parser};
use std::collections::HashMap;
use std::io;
use std::rc::Rc;

mod args;

mod codegen;

mod exprs;

mod ids;

mod syms;
pub use syms::SymbolKey;
use syms::{GlobalSymtable, LocalSymtable, LocalSymtableSnapshot};

mod top;
use top::{Context, prepare_globals};
pub use top::{GlobalDef, GlobalDefKind, only_metadata};

/// Errors that can occur during compilation.
#[derive(Debug, thiserror::Error)]
pub enum Error {
    /// Attempt to redefine an already-defined symbol.
    #[error("{0}: Cannot redefine {1}")]
    AlreadyDefined(LineCol, VarRef),

    /// Array name used without subscripts (as a scalar).
    #[error("{0}: {1} is an array and requires subscripts")]
    ArrayUsedAsScalar(LineCol, VarRef),

    /// Type mismatch in a binary operation.
    #[error("{0}: Cannot {1} {2} and {3}")]
    BinaryOpType(LineCol, &'static str, ExprType, ExprType),

    /// Callable invoked with incorrect syntax.
    // TODO(jmmv): It'd be nice if we could carry an Rc<CallableMetadata> here to avoid copying
    // but... because of async in consumers, we would need an `Arc` instead just for this single
    // error type.   Given that performance during error propagation is not important, the copy
    // is just fine.  If we ever have to pollute everything with `Arc`s in the future, then we
    // could do this.
    #[error("{0}: {} expected {}", .1.name(), .1.syntax())]
    CallableSyntax(LineCol, CallableMetadata),

    /// Attempt to nest FUNCTION or SUB definitions.
    #[error("{0}: Cannot nest FUNCTION or SUB declarations nor definitions")]
    CannotNestUserCallables(LineCol),

    /// Attempt to redefine an already-defined label.
    #[error("{0}: Duplicate label {1}")]
    DuplicateLabel(LineCol, String),

    /// Type annotation in a reference doesn't match the variable's type.
    #[error("{0}: Incompatible type annotation in {1} reference")]
    IncompatibleTypeAnnotationInReference(LineCol, VarRef),

    /// Type mismatch in an assignment.
    #[error("{0}: Cannot assign value of type {1} to variable of type {2}")]
    IncompatibleTypesInAssignment(LineCol, ExprType, ExprType),

    /// `END` code is out of range.
    #[error("{0}: {1}")]
    InvalidEndCode(LineCol, String),

    /// I/O error while reading the source.
    #[error("{0}: I/O error during compilation: {1}")]
    Io(LineCol, io::Error),

    /// Attempt to call something that is not an array nor a function.
    #[error("{0}: {1} is not an array nor a function")]
    NotAFunction(LineCol, VarRef),

    /// `EXIT` statement found outside its expected block.
    #[error("{0}: EXIT {1} outside of {1}")]
    MisplacedExit(LineCol, &'static str),

    /// Attempt to index something that is not an array.
    #[error("{0}: {1} is not an array")]
    NotAnArray(LineCol, VarRef),

    /// Expected a numeric type but got something else.
    #[error("{0}: {1} is not a number")]
    NotANumber(LineCol, ExprType),

    /// Constants pool has been exhausted.
    #[error("{0}: Out of constants")]
    OutOfConstants(LineCol),

    /// Register allocation has been exhausted.
    #[error("{0}: Out of {1} registers")]
    OutOfRegisters(LineCol, RegisterScope),

    /// Upcall table has been exhausted.
    #[error("{0}: Out of upcalls")]
    OutOfUpcalls(LineCol),

    /// Syntax error from the parser.
    #[error("{0}: {1}")]
    Parse(LineCol, String),

    /// Jump or call target is too far away.
    #[error("{0}: Jump/call target is {1} which is too far")]
    TargetTooFar(LineCol, usize),

    /// An array has too many dimensions.
    #[error("{0}: Array cannot have {1} dimensions")]
    TooManyArrayDimensions(LineCol, usize),

    /// Type mismatch where a specific type was expected.
    #[error("{0}: Expected {2} but found {1}")]
    TypeMismatch(LineCol, ExprType, ExprType),

    /// Reference to an undefined symbol.
    #[error("{0}: Undefined symbol {1}")]
    UndefinedSymbol(LineCol, VarRef),

    /// Reference to an unknown label.
    #[error("{0}: Unknown label {1}")]
    UnknownLabel(LineCol, String),

    /// Wrong number of subscripts for an array access.
    #[error("{0}: Array requires {1} subscripts but got {2}")]
    WrongNumberOfSubscripts(LineCol, usize, usize),
}

impl Error {
    /// Splits a textual error message into its source position and message.
    ///
    /// TODO(jmmv): This is a hack to support the current needs of std and allow migrating its
    /// code to this new implementation.  Once migrated, revisit this.
    fn split_display_message(&self) -> (LineCol, String) {
        let display = self.to_string();
        let mut parts = display.splitn(3, ':');
        let line = parts
            .next()
            .expect("CompilerError display always has line")
            .parse()
            .expect("CompilerError line is always numeric");
        let col = parts
            .next()
            .expect("CompilerError display always has column")
            .parse()
            .expect("CompilerError column is always numeric");
        let message =
            parts.next().expect("CompilerError display always has message").trim_start().to_owned();
        (LineCol { line, col }, message)
    }

    /// Returns the source position where this compilation error happened.
    pub fn pos(&self) -> LineCol {
        self.split_display_message().0
    }

    /// Returns this error's message without the source position prefix.
    pub fn message_without_pos(&self) -> String {
        self.split_display_message().1
    }

    /// Annotates an invalid `END` exit code error with a source position.
    fn from_bytecode_invalid_exit_code(value: InvalidExitCodeError, pos: LineCol) -> Self {
        Self::InvalidEndCode(pos, value.to_string())
    }

    /// Annotates an error from the symbol table with the position it arised from.
    fn from_syms(value: syms::Error, pos: LineCol) -> Self {
        match value {
            syms::Error::AlreadyDefined(vref) => Error::AlreadyDefined(pos, vref),
            syms::Error::IncompatibleTypeAnnotationInReference(vref) => {
                Error::IncompatibleTypeAnnotationInReference(pos, vref)
            }
            syms::Error::OutOfRegisters(scope) => Error::OutOfRegisters(pos, scope),
            syms::Error::UndefinedSymbol(vref, _scope) => Error::UndefinedSymbol(pos, vref),
        }
    }
}

impl From<parser::Error> for Error {
    fn from(value: parser::Error) -> Self {
        match value {
            parser::Error::Bad(pos, message) => Self::Parse(pos, message),
            parser::Error::Io(pos, e) => Self::Io(pos, e),
        }
    }
}

/// Result type for compilation operations.
pub type Result<T> = std::result::Result<T, Error>;

/// Compiler context.
///
/// This exists to support incremental compilation by keeping state and appending code to the
/// image being built, which is useful in REPL scenarios.
pub struct Compiler {
    context: Context,
    symtable: GlobalSymtable,
    program_scope: LocalSymtableSnapshot,
}

impl Compiler {
    /// Creates a new compiler instance.
    ///
    /// `global_defs` provides pre-defined global variables visible to the compiled program.
    ///
    /// `upcalls` contains the metadata of all built-in callables that the compiled code can use.
    pub fn new(
        upcalls: &HashMap<SymbolKey, Rc<dyn Callable>>,
        global_defs: &[GlobalDef],
    ) -> Result<Self> {
        let mut upcalls_metadata = HashMap::with_capacity(upcalls.len());
        for (k, v) in upcalls.iter() {
            upcalls_metadata.insert(k.clone(), v.metadata());
        }

        let mut context = Context::default();

        let mut symtable = GlobalSymtable::new(upcalls_metadata);
        prepare_globals(&mut context, &mut symtable, global_defs)?;

        Ok(Self { context, symtable, program_scope: LocalSymtableSnapshot::default() })
    }

    /// Compiles a chunk of code.
    pub fn compile(mut self, input: &mut dyn io::Read) -> Result<Image> {
        let mut image = Image::default();
        self.compile_more(&mut image, input)?;
        Ok(image)
    }

    /// Compiles a chunk of code and appends it to `image`.
    pub fn compile_more(&mut self, image: &mut Image, input: &mut dyn io::Read) -> Result<()> {
        let mut new_context = self.context.clone();
        let mut new_symtable = self.symtable.clone();
        let program_scope = LocalSymtable::restore(&mut new_symtable, self.program_scope.clone());
        let (delta, snapshot) = top::compile(input, image, &mut new_context, program_scope)?;
        image.append(delta);
        self.context = new_context;
        self.symtable = new_symtable;
        self.program_scope = snapshot;
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_error_pos_and_message_without_pos() {
        let err = Error::Parse(LineCol { line: 3, col: 15 }, "Invalid token".to_owned());

        assert_eq!(LineCol { line: 3, col: 15 }, err.pos());
        assert_eq!("Invalid token", err.message_without_pos());
    }

    #[test]
    fn test_error_message_without_pos_preserves_colons() {
        let err =
            Error::Parse(LineCol { line: 4, col: 9 }, "Expected INTEGER: got STRING".to_owned());

        assert_eq!(LineCol { line: 4, col: 9 }, err.pos());
        assert_eq!("Expected INTEGER: got STRING", err.message_without_pos());
    }
}