use crate::bf::Op;
use std::fmt::Write;
#[derive(Debug, Clone)]
pub struct LlvmOptions {
pub tape_size: usize,
pub target_triple: Option<String>,
pub source_filename: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum CodegenError {
InvalidTapeSize(usize),
}
impl std::fmt::Display for CodegenError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidTapeSize(size) => {
write!(f, "tape size must be greater than zero, got {size}")
}
}
}
}
impl std::error::Error for CodegenError {}
pub fn generate_module(ops: &[Op], options: &LlvmOptions) -> Result<String, CodegenError> {
if options.tape_size == 0 {
return Err(CodegenError::InvalidTapeSize(options.tape_size));
}
let mut emitter = Emitter::new(options);
emitter.emit_preamble();
emitter.emit_main(ops);
Ok(emitter.output)
}
struct Emitter<'a> {
output: String,
tape_size: usize,
target_triple: Option<&'a str>,
source_filename: Option<&'a str>,
temp_index: usize,
label_index: usize,
}
impl<'a> Emitter<'a> {
fn new(options: &'a LlvmOptions) -> Self {
Self {
output: String::new(),
tape_size: options.tape_size,
target_triple: options.target_triple.as_deref(),
source_filename: options.source_filename.as_deref(),
temp_index: 0,
label_index: 0,
}
}
fn emit_preamble(&mut self) {
self.line("; Generated by hypothalamus.");
if let Some(source_filename) = self.source_filename {
self.line(&format!(
"source_filename = \"{}\"",
escape_llvm_string(source_filename)
));
}
if let Some(target_triple) = self.target_triple {
self.line(&format!(
"target triple = \"{}\"",
escape_llvm_string(target_triple)
));
}
self.blank_line();
self.line(&format!(
"@tape = internal global [{} x i8] zeroinitializer, align 16",
self.tape_size
));
self.blank_line();
self.line("declare i32 @putchar(i32)");
self.line("declare i32 @getchar()");
self.blank_line();
}
fn emit_main(&mut self, ops: &[Op]) {
self.line("define i32 @main() {");
self.label("entry");
self.line(" %ptr = alloca i64, align 8");
self.line(" store i64 0, ptr %ptr, align 8");
self.emit_ops(ops);
self.line(" ret i32 0");
self.line("}");
}
fn emit_ops(&mut self, ops: &[Op]) {
for op in ops {
match op {
Op::Add(delta) => self.emit_add(*delta),
Op::Move(delta) => self.emit_move(*delta),
Op::Input => self.emit_input(),
Op::Output => self.emit_output(),
Op::Loop(body) => self.emit_loop(body),
Op::Clear => self.emit_clear(),
}
}
}
fn emit_add(&mut self, delta: i32) {
let delta = delta.rem_euclid(256);
if delta == 0 {
return;
}
let cell_ptr = self.emit_cell_ptr();
let current = self.temp();
self.line(&format!(" {current} = load i8, ptr {cell_ptr}, align 1"));
let next = self.temp();
self.line(&format!(" {next} = add i8 {current}, {delta}"));
self.line(&format!(" store i8 {next}, ptr {cell_ptr}, align 1"));
}
fn emit_move(&mut self, delta: i64) {
if delta == 0 {
return;
}
let current = self.temp();
self.line(&format!(" {current} = load i64, ptr %ptr, align 8"));
let next = self.temp();
self.line(&format!(" {next} = add i64 {current}, {delta}"));
self.line(&format!(" store i64 {next}, ptr %ptr, align 8"));
}
fn emit_input(&mut self) {
let byte = self.temp();
self.line(&format!(" {byte} = call i32 @getchar()"));
let is_eof = self.temp();
self.line(&format!(" {is_eof} = icmp eq i32 {byte}, -1"));
let store_label = self.fresh_label("input_store");
let cont_label = self.fresh_label("input_cont");
self.line(&format!(
" br i1 {is_eof}, label %{cont_label}, label %{store_label}"
));
self.label(&store_label);
let truncated = self.temp();
self.line(&format!(" {truncated} = trunc i32 {byte} to i8"));
let cell_ptr = self.emit_cell_ptr();
self.line(&format!(" store i8 {truncated}, ptr {cell_ptr}, align 1"));
self.line(&format!(" br label %{cont_label}"));
self.label(&cont_label);
}
fn emit_output(&mut self) {
let cell_ptr = self.emit_cell_ptr();
let byte = self.temp();
self.line(&format!(" {byte} = load i8, ptr {cell_ptr}, align 1"));
let widened = self.temp();
self.line(&format!(" {widened} = zext i8 {byte} to i32"));
let result = self.temp();
self.line(&format!(" {result} = call i32 @putchar(i32 {widened})"));
}
fn emit_loop(&mut self, body: &[Op]) {
let check_label = self.fresh_label("loop_check");
let body_label = self.fresh_label("loop_body");
let end_label = self.fresh_label("loop_end");
self.line(&format!(" br label %{check_label}"));
self.label(&check_label);
let cell_ptr = self.emit_cell_ptr();
let byte = self.temp();
self.line(&format!(" {byte} = load i8, ptr {cell_ptr}, align 1"));
let is_zero = self.temp();
self.line(&format!(" {is_zero} = icmp eq i8 {byte}, 0"));
self.line(&format!(
" br i1 {is_zero}, label %{end_label}, label %{body_label}"
));
self.label(&body_label);
self.emit_ops(body);
self.line(&format!(" br label %{check_label}"));
self.label(&end_label);
}
fn emit_clear(&mut self) {
let cell_ptr = self.emit_cell_ptr();
self.line(&format!(" store i8 0, ptr {cell_ptr}, align 1"));
}
fn emit_cell_ptr(&mut self) -> String {
let pointer = self.temp();
self.line(&format!(" {pointer} = load i64, ptr %ptr, align 8"));
let cell_ptr = self.temp();
self.line(&format!(
" {cell_ptr} = getelementptr [{} x i8], ptr @tape, i64 0, i64 {pointer}",
self.tape_size
));
cell_ptr
}
fn temp(&mut self) -> String {
let temp = format!("%{}", self.temp_index);
self.temp_index += 1;
temp
}
fn fresh_label(&mut self, prefix: &str) -> String {
let label = format!("{prefix}_{}", self.label_index);
self.label_index += 1;
label
}
fn label(&mut self, label: &str) {
self.line(&format!("{label}:"));
}
fn line(&mut self, line: &str) {
self.output.push_str(line);
self.output.push('\n');
}
fn blank_line(&mut self) {
self.output.push('\n');
}
}
fn escape_llvm_string(value: &str) -> String {
let mut escaped = String::new();
for byte in value.bytes() {
match byte {
b'"' => escaped.push_str("\\22"),
b'\\' => escaped.push_str("\\5C"),
0x20..=0x7e => escaped.push(byte as char),
_ => {
write!(&mut escaped, "\\{byte:02X}").expect("write to string");
}
}
}
escaped
}
#[cfg(test)]
mod tests {
use super::*;
use crate::DEFAULT_TAPE_SIZE;
fn options() -> LlvmOptions {
LlvmOptions {
tape_size: DEFAULT_TAPE_SIZE,
target_triple: Some("x86_64-unknown-linux-gnu".to_string()),
source_filename: Some("test.b".to_string()),
}
}
#[test]
fn emits_module_header_and_main() {
let ir = generate_module(&[], &options()).expect("codegen");
assert!(ir.contains("target triple = \"x86_64-unknown-linux-gnu\""));
assert!(ir.contains("@tape = internal global [30000 x i8] zeroinitializer"));
assert!(ir.contains("define i32 @main()"));
assert!(ir.contains("ret i32 0"));
}
#[test]
fn input_leaves_cell_unchanged_on_eof() {
let ir = generate_module(&[Op::Input], &options()).expect("codegen");
assert!(ir.contains("call i32 @getchar()"));
assert!(ir.contains("icmp eq i32"));
assert!(ir.contains("input_store_"));
assert!(ir.contains("input_cont_"));
}
#[test]
fn output_names_putchar_result() {
let ir = generate_module(&[Op::Output], &options()).expect("codegen");
assert!(ir.contains(" = call i32 @putchar(i32 "));
}
#[test]
fn emits_loop_blocks() {
let ir = generate_module(&[Op::Loop(vec![Op::Move(1)])], &options()).expect("codegen");
assert!(ir.contains("loop_check_"));
assert!(ir.contains("loop_body_"));
assert!(ir.contains("loop_end_"));
}
#[test]
fn rejects_empty_tape() {
let mut options = options();
options.tape_size = 0;
assert_eq!(
generate_module(&[], &options).expect_err("invalid tape"),
CodegenError::InvalidTapeSize(0)
);
}
}