hypothalamus 0.1.1

A Brainfuck AOT compiler with an LLVM IR backend
Documentation
use hypothalamus::DEFAULT_TAPE_SIZE;
use hypothalamus::bf;
use hypothalamus::llvm::{self, LlvmOptions};
use std::env;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::{Command, ExitCode};
use std::time::{SystemTime, UNIX_EPOCH};

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum EmitKind {
    Executable,
    Object,
    Assembly,
    LlvmIr,
}

impl EmitKind {
    fn parse(value: &str) -> Option<Self> {
        match value {
            "exe" | "executable" => Some(Self::Executable),
            "obj" | "object" => Some(Self::Object),
            "asm" | "assembly" => Some(Self::Assembly),
            "llvm-ir" | "ll" => Some(Self::LlvmIr),
            _ => None,
        }
    }
}

#[derive(Debug)]
struct Config {
    input: PathBuf,
    output: Option<PathBuf>,
    emit: EmitKind,
    target_triple: Option<String>,
    tape_size: usize,
    clang: String,
    keep_ll: bool,
}

#[derive(Debug)]
enum Action {
    Run(Config),
    Help,
    Version,
}

fn main() -> ExitCode {
    match run() {
        Ok(()) => ExitCode::SUCCESS,
        Err(Error::Usage(message)) => {
            eprintln!("hypothalamus: {message}");
            eprintln!("run `hypothalamus --help` for usage");
            ExitCode::from(2)
        }
        Err(Error::Failure(message)) => {
            eprintln!("hypothalamus: {message}");
            ExitCode::FAILURE
        }
    }
}

fn run() -> Result<(), Error> {
    match parse_args(env::args().skip(1))? {
        Action::Help => {
            print_help();
            Ok(())
        }
        Action::Version => {
            println!("hypothalamus {}", env!("CARGO_PKG_VERSION"));
            Ok(())
        }
        Action::Run(config) => compile(config),
    }
}

fn compile(config: Config) -> Result<(), Error> {
    let source = fs::read(&config.input).map_err(|err| {
        Error::Failure(format!("failed to read {}: {err}", config.input.display()))
    })?;
    let ops = bf::parse(&source).map_err(|err| Error::Failure(format!("syntax error: {err}")))?;

    let module = llvm::generate_module(
        &ops,
        &LlvmOptions {
            tape_size: config.tape_size,
            target_triple: config.target_triple.clone(),
            source_filename: Some(config.input.display().to_string()),
        },
    )
    .map_err(|err| Error::Failure(format!("LLVM code generation failed: {err}")))?;

    let output = config.output.clone().unwrap_or_else(|| {
        default_output_path(&config.input, config.emit, config.target_triple.as_deref())
    });

    if config.emit == EmitKind::LlvmIr {
        return write_llvm_ir(&output, &module);
    }

    compile_with_clang(&config, &output, &module)
}

fn write_llvm_ir(output: &Path, module: &str) -> Result<(), Error> {
    if output == Path::new("-") {
        print!("{module}");
        return Ok(());
    }

    fs::write(output, module).map_err(|err| {
        Error::Failure(format!(
            "failed to write LLVM IR to {}: {err}",
            output.display()
        ))
    })
}

fn compile_with_clang(config: &Config, output: &Path, module: &str) -> Result<(), Error> {
    let ll_path = if config.keep_ll {
        output.with_extension("ll")
    } else {
        temporary_llvm_path()
    };

    fs::write(&ll_path, module).map_err(|err| {
        Error::Failure(format!(
            "failed to write temporary LLVM IR to {}: {err}",
            ll_path.display()
        ))
    })?;

    let result = invoke_clang(config, output, &ll_path);

    if !config.keep_ll {
        let _ = fs::remove_file(&ll_path);
    }

    result
}

fn invoke_clang(config: &Config, output: &Path, ll_path: &Path) -> Result<(), Error> {
    let mut command = Command::new(&config.clang);
    command.arg("-Wno-override-module");

    if let Some(target_triple) = &config.target_triple {
        command.arg(format!("--target={target_triple}"));
    }

    match config.emit {
        EmitKind::Executable => {}
        EmitKind::Object => {
            command.arg("-c");
        }
        EmitKind::Assembly => {
            command.arg("-S");
        }
        EmitKind::LlvmIr => unreachable!("LLVM IR emission does not invoke clang"),
    }

    command.arg(ll_path);
    command.arg("-o");
    command.arg(output);

    let status = command.status().map_err(|err| {
        Error::Failure(format!(
            "failed to run `{}`. Install clang or pass --cc <path>: {err}",
            config.clang
        ))
    })?;

    if !status.success() {
        return Err(Error::Failure(format!("clang failed with status {status}")));
    }

    Ok(())
}

fn default_output_path(input: &Path, emit: EmitKind, target_triple: Option<&str>) -> PathBuf {
    let mut output = input.to_path_buf();

    match emit {
        EmitKind::Executable => {
            output.set_extension("");
            if target_triple
                .map(|target| target.contains("windows"))
                .unwrap_or(false)
            {
                output.set_extension("exe");
            } else if output == input {
                output.set_extension("out");
            }
        }
        EmitKind::Object => {
            output.set_extension("o");
        }
        EmitKind::Assembly => {
            output.set_extension("s");
        }
        EmitKind::LlvmIr => {
            output.set_extension("ll");
        }
    }

    output
}

fn temporary_llvm_path() -> PathBuf {
    let timestamp = SystemTime::now()
        .duration_since(UNIX_EPOCH)
        .map(|duration| duration.as_nanos())
        .unwrap_or_default();
    env::temp_dir().join(format!(
        "hypothalamus-{}-{timestamp}.ll",
        std::process::id()
    ))
}

fn parse_args(args: impl IntoIterator<Item = String>) -> Result<Action, Error> {
    let mut input = None;
    let mut output = None;
    let mut emit = EmitKind::Executable;
    let mut target_triple = None;
    let mut tape_size = DEFAULT_TAPE_SIZE;
    let mut clang = "clang".to_string();
    let mut keep_ll = false;

    let mut args = args.into_iter();
    while let Some(arg) = args.next() {
        match arg.as_str() {
            "-h" | "--help" => return Ok(Action::Help),
            "--version" => return Ok(Action::Version),
            "-o" | "--output" => {
                output = Some(PathBuf::from(next_value(&mut args, &arg)?));
            }
            "--emit" => {
                emit = parse_emit_value(&next_value(&mut args, &arg)?)?;
            }
            "--target" => {
                target_triple = Some(next_value(&mut args, &arg)?);
            }
            "--tape-size" => {
                let value = next_value(&mut args, &arg)?;
                tape_size = value
                    .parse::<usize>()
                    .map_err(|_| Error::Usage(format!("invalid --tape-size value `{value}`")))?;
            }
            "--cc" => {
                clang = next_value(&mut args, &arg)?;
            }
            "--keep-ll" => {
                keep_ll = true;
            }
            "--" => {
                for value in args {
                    set_input(&mut input, value)?;
                }
                break;
            }
            _ if arg.starts_with("--output=") => {
                output = Some(PathBuf::from(value_after_equals(&arg)));
            }
            _ if arg.starts_with("--emit=") => {
                emit = parse_emit_value(value_after_equals(&arg))?;
            }
            _ if arg.starts_with("--target=") => {
                target_triple = Some(value_after_equals(&arg).to_string());
            }
            _ if arg.starts_with("--tape-size=") => {
                let value = value_after_equals(&arg);
                tape_size = value
                    .parse::<usize>()
                    .map_err(|_| Error::Usage(format!("invalid --tape-size value `{value}`")))?;
            }
            _ if arg.starts_with("--cc=") => {
                clang = value_after_equals(&arg).to_string();
            }
            _ if arg.starts_with('-') && arg != "-" => {
                return Err(Error::Usage(format!("unknown option `{arg}`")));
            }
            _ => set_input(&mut input, arg)?,
        }
    }

    let input = input.ok_or_else(|| Error::Usage("missing input file".to_string()))?;

    Ok(Action::Run(Config {
        input,
        output,
        emit,
        target_triple,
        tape_size,
        clang,
        keep_ll,
    }))
}

fn next_value(args: &mut impl Iterator<Item = String>, option: &str) -> Result<String, Error> {
    args.next()
        .ok_or_else(|| Error::Usage(format!("missing value for {option}")))
}

fn set_input(input: &mut Option<PathBuf>, value: String) -> Result<(), Error> {
    if input.is_some() {
        return Err(Error::Usage("multiple input files provided".to_string()));
    }

    *input = Some(PathBuf::from(value));
    Ok(())
}

fn parse_emit_value(value: &str) -> Result<EmitKind, Error> {
    EmitKind::parse(value).ok_or_else(|| {
        Error::Usage(format!(
            "invalid --emit value `{value}`; expected exe, obj, asm, or llvm-ir"
        ))
    })
}

fn value_after_equals(value: &str) -> &str {
    value.split_once('=').map(|(_, value)| value).unwrap_or("")
}

fn print_help() {
    println!(
        "Hypothalamus - Brainfuck AOT compiler with an LLVM backend\n\
\n\
Usage:\n\
  hypothalamus [OPTIONS] <INPUT>\n\
\n\
Options:\n\
  -o, --output <PATH>       Output path. Use '-' with --emit llvm-ir for stdout\n\
      --emit <KIND>         exe, obj, asm, or llvm-ir [default: exe]\n\
      --target <TRIPLE>     LLVM target triple passed to clang and embedded in IR\n\
      --tape-size <CELLS>   Tape cell count [default: 30000]\n\
      --cc <PATH>           clang-compatible LLVM driver [default: clang]\n\
      --keep-ll             Keep generated LLVM IR beside the output\n\
  -h, --help                Print help\n\
      --version             Print version\n"
    );
}

#[derive(Debug)]
enum Error {
    Usage(String),
    Failure(String),
}