hypothalamus 0.1.0

A Brainfuck AOT compiler with an LLVM IR backend
Documentation
use crate::bf::Op;
use std::fmt::Write;

#[derive(Debug, Clone)]
pub struct LlvmOptions {
    pub tape_size: usize,
    pub target_triple: Option<String>,
    pub source_filename: Option<String>,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum CodegenError {
    InvalidTapeSize(usize),
}

impl std::fmt::Display for CodegenError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::InvalidTapeSize(size) => {
                write!(f, "tape size must be greater than zero, got {size}")
            }
        }
    }
}

impl std::error::Error for CodegenError {}

pub fn generate_module(ops: &[Op], options: &LlvmOptions) -> Result<String, CodegenError> {
    if options.tape_size == 0 {
        return Err(CodegenError::InvalidTapeSize(options.tape_size));
    }

    let mut emitter = Emitter::new(options);
    emitter.emit_preamble();
    emitter.emit_main(ops);
    Ok(emitter.output)
}

struct Emitter<'a> {
    output: String,
    tape_size: usize,
    target_triple: Option<&'a str>,
    source_filename: Option<&'a str>,
    temp_index: usize,
    label_index: usize,
}

impl<'a> Emitter<'a> {
    fn new(options: &'a LlvmOptions) -> Self {
        Self {
            output: String::new(),
            tape_size: options.tape_size,
            target_triple: options.target_triple.as_deref(),
            source_filename: options.source_filename.as_deref(),
            temp_index: 0,
            label_index: 0,
        }
    }

    fn emit_preamble(&mut self) {
        self.line("; Generated by hypothalamus.");

        if let Some(source_filename) = self.source_filename {
            self.line(&format!(
                "source_filename = \"{}\"",
                escape_llvm_string(source_filename)
            ));
        }

        if let Some(target_triple) = self.target_triple {
            self.line(&format!(
                "target triple = \"{}\"",
                escape_llvm_string(target_triple)
            ));
        }

        self.blank_line();
        self.line(&format!(
            "@tape = internal global [{} x i8] zeroinitializer, align 16",
            self.tape_size
        ));
        self.blank_line();
        self.line("declare i32 @putchar(i32)");
        self.line("declare i32 @getchar()");
        self.blank_line();
    }

    fn emit_main(&mut self, ops: &[Op]) {
        self.line("define i32 @main() {");
        self.label("entry");
        self.line("  %ptr = alloca i64, align 8");
        self.line("  store i64 0, ptr %ptr, align 8");
        self.emit_ops(ops);
        self.line("  ret i32 0");
        self.line("}");
    }

    fn emit_ops(&mut self, ops: &[Op]) {
        for op in ops {
            match op {
                Op::Add(delta) => self.emit_add(*delta),
                Op::Move(delta) => self.emit_move(*delta),
                Op::Input => self.emit_input(),
                Op::Output => self.emit_output(),
                Op::Loop(body) => self.emit_loop(body),
                Op::Clear => self.emit_clear(),
            }
        }
    }

    fn emit_add(&mut self, delta: i32) {
        let delta = delta.rem_euclid(256);
        if delta == 0 {
            return;
        }

        let cell_ptr = self.emit_cell_ptr();
        let current = self.temp();
        self.line(&format!("  {current} = load i8, ptr {cell_ptr}, align 1"));
        let next = self.temp();
        self.line(&format!("  {next} = add i8 {current}, {delta}"));
        self.line(&format!("  store i8 {next}, ptr {cell_ptr}, align 1"));
    }

    fn emit_move(&mut self, delta: i64) {
        if delta == 0 {
            return;
        }

        let current = self.temp();
        self.line(&format!("  {current} = load i64, ptr %ptr, align 8"));
        let next = self.temp();
        self.line(&format!("  {next} = add i64 {current}, {delta}"));
        self.line(&format!("  store i64 {next}, ptr %ptr, align 8"));
    }

    fn emit_input(&mut self) {
        let byte = self.temp();
        self.line(&format!("  {byte} = call i32 @getchar()"));
        let is_eof = self.temp();
        self.line(&format!("  {is_eof} = icmp eq i32 {byte}, -1"));

        let store_label = self.fresh_label("input_store");
        let cont_label = self.fresh_label("input_cont");
        self.line(&format!(
            "  br i1 {is_eof}, label %{cont_label}, label %{store_label}"
        ));

        self.label(&store_label);
        let truncated = self.temp();
        self.line(&format!("  {truncated} = trunc i32 {byte} to i8"));
        let cell_ptr = self.emit_cell_ptr();
        self.line(&format!("  store i8 {truncated}, ptr {cell_ptr}, align 1"));
        self.line(&format!("  br label %{cont_label}"));

        self.label(&cont_label);
    }

    fn emit_output(&mut self) {
        let cell_ptr = self.emit_cell_ptr();
        let byte = self.temp();
        self.line(&format!("  {byte} = load i8, ptr {cell_ptr}, align 1"));
        let widened = self.temp();
        self.line(&format!("  {widened} = zext i8 {byte} to i32"));
        let result = self.temp();
        self.line(&format!("  {result} = call i32 @putchar(i32 {widened})"));
    }

    fn emit_loop(&mut self, body: &[Op]) {
        let check_label = self.fresh_label("loop_check");
        let body_label = self.fresh_label("loop_body");
        let end_label = self.fresh_label("loop_end");

        self.line(&format!("  br label %{check_label}"));

        self.label(&check_label);
        let cell_ptr = self.emit_cell_ptr();
        let byte = self.temp();
        self.line(&format!("  {byte} = load i8, ptr {cell_ptr}, align 1"));
        let is_zero = self.temp();
        self.line(&format!("  {is_zero} = icmp eq i8 {byte}, 0"));
        self.line(&format!(
            "  br i1 {is_zero}, label %{end_label}, label %{body_label}"
        ));

        self.label(&body_label);
        self.emit_ops(body);
        self.line(&format!("  br label %{check_label}"));

        self.label(&end_label);
    }

    fn emit_clear(&mut self) {
        let cell_ptr = self.emit_cell_ptr();
        self.line(&format!("  store i8 0, ptr {cell_ptr}, align 1"));
    }

    fn emit_cell_ptr(&mut self) -> String {
        let pointer = self.temp();
        self.line(&format!("  {pointer} = load i64, ptr %ptr, align 8"));
        let cell_ptr = self.temp();
        self.line(&format!(
            "  {cell_ptr} = getelementptr [{} x i8], ptr @tape, i64 0, i64 {pointer}",
            self.tape_size
        ));
        cell_ptr
    }

    fn temp(&mut self) -> String {
        let temp = format!("%{}", self.temp_index);
        self.temp_index += 1;
        temp
    }

    fn fresh_label(&mut self, prefix: &str) -> String {
        let label = format!("{prefix}_{}", self.label_index);
        self.label_index += 1;
        label
    }

    fn label(&mut self, label: &str) {
        self.line(&format!("{label}:"));
    }

    fn line(&mut self, line: &str) {
        self.output.push_str(line);
        self.output.push('\n');
    }

    fn blank_line(&mut self) {
        self.output.push('\n');
    }
}

fn escape_llvm_string(value: &str) -> String {
    let mut escaped = String::new();

    for byte in value.bytes() {
        match byte {
            b'"' => escaped.push_str("\\22"),
            b'\\' => escaped.push_str("\\5C"),
            0x20..=0x7e => escaped.push(byte as char),
            _ => {
                write!(&mut escaped, "\\{byte:02X}").expect("write to string");
            }
        }
    }

    escaped
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::DEFAULT_TAPE_SIZE;

    fn options() -> LlvmOptions {
        LlvmOptions {
            tape_size: DEFAULT_TAPE_SIZE,
            target_triple: Some("x86_64-unknown-linux-gnu".to_string()),
            source_filename: Some("test.b".to_string()),
        }
    }

    #[test]
    fn emits_module_header_and_main() {
        let ir = generate_module(&[], &options()).expect("codegen");
        assert!(ir.contains("target triple = \"x86_64-unknown-linux-gnu\""));
        assert!(ir.contains("@tape = internal global [30000 x i8] zeroinitializer"));
        assert!(ir.contains("define i32 @main()"));
        assert!(ir.contains("ret i32 0"));
    }

    #[test]
    fn input_leaves_cell_unchanged_on_eof() {
        let ir = generate_module(&[Op::Input], &options()).expect("codegen");
        assert!(ir.contains("call i32 @getchar()"));
        assert!(ir.contains("icmp eq i32"));
        assert!(ir.contains("input_store_"));
        assert!(ir.contains("input_cont_"));
    }

    #[test]
    fn output_names_putchar_result() {
        let ir = generate_module(&[Op::Output], &options()).expect("codegen");
        assert!(ir.contains(" = call i32 @putchar(i32 "));
    }

    #[test]
    fn emits_loop_blocks() {
        let ir = generate_module(&[Op::Loop(vec![Op::Move(1)])], &options()).expect("codegen");
        assert!(ir.contains("loop_check_"));
        assert!(ir.contains("loop_body_"));
        assert!(ir.contains("loop_end_"));
    }

    #[test]
    fn rejects_empty_tape() {
        let mut options = options();
        options.tape_size = 0;
        assert_eq!(
            generate_module(&[], &options).expect_err("invalid tape"),
            CodegenError::InvalidTapeSize(0)
        );
    }
}