arm64jit 0.3.7

Spec-driven AArch64 JIT assembler for Rust
Documentation
use std::collections::{BTreeMap, BTreeSet};
use std::env;
use std::ffi::OsStr;
use std::fs;
use std::path::{Path, PathBuf};

use jit_codegen::{collect_conditional_branch_alias_rules, generate_encoder_module};
use jit_spec::{FlatInstruction, flatten_instruction_set, parse_instructions_json_file};
use serde::Deserialize;

const FNV_OFFSET_BASIS: u64 = 0xcbf2_9ce4_8422_2325;
const FNV_PRIME: u64 = 0x0000_0100_0000_01b3;

#[derive(Debug, Clone, Deserialize)]
struct AliasRuleSpec {
    alias: String,
    canonical: String,
    transform: String,
    #[serde(default)]
    fixed_imms: Option<i16>,
}

fn main() {
    if let Err(err) = run() {
        panic!("failed generating jit specs: {err}");
    }
}

fn run() -> Result<(), Box<dyn std::error::Error>> {
    let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?);
    let out_dir = PathBuf::from(env::var("OUT_DIR")?);

    let instructions_json = manifest_dir.join("spec").join("Instructions.json");
    let alias_rules_json = manifest_dir.join("spec").join("alias_rules.json");
    let build_rs = manifest_dir.join("build.rs");
    let codegen_core = manifest_dir
        .join("..")
        .join("jit-codegen")
        .join("src")
        .join("core.rs");
    let spec_lib = manifest_dir
        .join("..")
        .join("jit-spec")
        .join("src")
        .join("lib.rs");

    let tracked_inputs = [
        instructions_json.clone(),
        alias_rules_json.clone(),
        build_rs,
        codegen_core,
        spec_lib,
    ];
    for path in &tracked_inputs {
        println!("cargo:rerun-if-changed={}", path.display());
    }

    let cache_key = compute_cache_key(&tracked_inputs)?;
    let cache_dir = build_cache_root(&out_dir, "jit").join(cache_key);
    let output_files = ["generated_specs.rs", "generated_alias_rules.rs"];

    if restore_cached_outputs(&cache_dir, &out_dir, &output_files)? {
        return Ok(());
    }

    let doc = parse_instructions_json_file(&instructions_json)?;
    let flat = flatten_instruction_set(&doc, "A64")?;

    let generated = generate_encoder_module(&flat)?;
    let conditional_branch_alias_rules = collect_conditional_branch_alias_rules(&flat)?;
    let alias_rules = load_alias_rules(&alias_rules_json)?;
    let mnemonic_ids = build_mnemonic_id_map(&flat)?;
    let generated_alias_rules =
        generate_alias_rules_module(&alias_rules, &conditional_branch_alias_rules, &mnemonic_ids)?;

    fs::write(out_dir.join(output_files[0]), &generated)?;
    fs::write(out_dir.join(output_files[1]), &generated_alias_rules)?;
    persist_cached_outputs(&cache_dir, &out_dir, &output_files)?;
    Ok(())
}

fn load_alias_rules(path: &Path) -> Result<Vec<AliasRuleSpec>, Box<dyn std::error::Error>> {
    let text = fs::read_to_string(path)?;
    let mut rules: Vec<AliasRuleSpec> = serde_json::from_str(&text)?;
    rules.sort_by(|lhs, rhs| lhs.alias.cmp(&rhs.alias));
    rules.dedup_by(|lhs, rhs| lhs.alias == rhs.alias);
    Ok(rules)
}

fn transform_variant_name(name: &str) -> Result<&'static str, Box<dyn std::error::Error>> {
    match name {
        "pure_rename" => Ok("PureRename"),
        "ret_default" => Ok("RetDefault"),
        "cmp_like" => Ok("CmpLike"),
        "cmn_like" => Ok("CmnLike"),
        "tst_like" => Ok("TstLike"),
        "mov_like" => Ok("MovLike"),
        "mvn_like" => Ok("MvnLike"),
        "cinc_like" => Ok("CincLike"),
        "cset_like" => Ok("CsetLike"),
        "cneg_like" => Ok("CnegLike"),
        "bitfield_bfi" => Ok("BitfieldBfi"),
        "bitfield_bfxil" => Ok("BitfieldBfxil"),
        "bitfield_bfc" => Ok("BitfieldBfc"),
        "bitfield_ubfx" => Ok("BitfieldUbfx"),
        "bitfield_sbfx" => Ok("BitfieldSbfx"),
        "bitfield_sbfiz" => Ok("BitfieldSbfiz"),
        "bitfield_extract_fixed" => Ok("BitfieldExtractFixed"),
        "extend_long_zero" => Ok("ExtendLongZero"),
        "stsetl_like" => Ok("StsetlLike"),
        "dc_like" => Ok("DcLike"),
        _ => Err(format!("unknown alias transform {name:?}").into()),
    }
}

fn build_mnemonic_id_map(
    flat: &[FlatInstruction],
) -> Result<BTreeMap<String, u16>, Box<dyn std::error::Error>> {
    let mut names = BTreeSet::<String>::new();
    for inst in flat {
        names.insert(inst.mnemonic.clone());
    }
    if names.len() > usize::from(u16::MAX) + 1 {
        return Err("too many mnemonics to fit in u16".into());
    }
    let mut out = BTreeMap::<String, u16>::new();
    for (idx, name) in names.into_iter().enumerate() {
        out.insert(name, idx as u16);
    }
    Ok(out)
}

fn generate_alias_rules_module(
    rules: &[AliasRuleSpec],
    conditional_branch_alias_rules: &[(String, String, u8)],
    mnemonic_ids: &BTreeMap<String, u16>,
) -> Result<String, Box<dyn std::error::Error>> {
    let mut out = String::new();
    out.push_str("// @generated by jit/build.rs. DO NOT EDIT.\n");
    out.push_str("pub(crate) static ALIAS_RULES: &[AliasRule] = &[\n");
    for rule in rules {
        let transform = transform_variant_name(&rule.transform)?;
        let canonical_id = mnemonic_ids
            .get(&rule.canonical)
            .copied()
            .or_else(|| {
                mnemonic_ids
                    .get(&rule.canonical.to_ascii_lowercase())
                    .copied()
            })
            .ok_or_else(|| {
                format!(
                    "unknown canonical mnemonic in alias rules: {}",
                    rule.canonical
                )
            })?;
        out.push_str("    AliasRule {\n");
        out.push_str(&format!("        alias: {:?},\n", rule.alias));
        out.push_str(&format!("        canonical: {:?},\n", rule.canonical));
        out.push_str(&format!("        canonical_id: {canonical_id},\n"));
        out.push_str(&format!(
            "        transform: AliasTransform::{transform},\n"
        ));
        out.push_str(&format!(
            "        fixed_imms: {},\n",
            rule.fixed_imms.unwrap_or(-1)
        ));
        out.push_str("    },\n");
    }
    out.push_str("];\n\n");
    out.push_str(
        "pub(crate) fn lookup_alias_rule(mnemonic: &str) -> Option<&'static AliasRule> {\n",
    );
    out.push_str("    let idx = ALIAS_RULES\n");
    out.push_str("        .binary_search_by(|rule| rule.alias.cmp(mnemonic))\n");
    out.push_str("        .ok()?;\n");
    out.push_str("    Some(&ALIAS_RULES[idx])\n");
    out.push_str("}\n");
    out.push_str("\n");
    out.push_str(
        "pub(crate) static CONDITIONAL_BRANCH_ALIAS_RULES: &[ConditionalBranchAliasRule] = &[\n",
    );
    for (alias, base_mnemonic, condition_code) in conditional_branch_alias_rules {
        let base_mnemonic_id = mnemonic_ids
            .get(base_mnemonic)
            .copied()
            .or_else(|| {
                mnemonic_ids
                    .get(&base_mnemonic.to_ascii_lowercase())
                    .copied()
            })
            .ok_or_else(|| {
                format!("unknown base mnemonic in conditional alias rules: {base_mnemonic}")
            })?;
        out.push_str("    ConditionalBranchAliasRule {\n");
        out.push_str(&format!("        alias: {:?},\n", alias));
        out.push_str(&format!("        base_mnemonic: {:?},\n", base_mnemonic));
        out.push_str(&format!("        base_mnemonic_id: {base_mnemonic_id},\n"));
        out.push_str(&format!("        condition_code: {condition_code},\n"));
        out.push_str("    },\n");
    }
    out.push_str("];\n\n");
    out.push_str(
        "pub(crate) fn lookup_conditional_branch_alias(mnemonic: &str) -> Option<&'static ConditionalBranchAliasRule> {\n",
    );
    out.push_str("    let idx = CONDITIONAL_BRANCH_ALIAS_RULES\n");
    out.push_str("        .binary_search_by(|rule| rule.alias.cmp(mnemonic))\n");
    out.push_str("        .ok()?;\n");
    out.push_str("    Some(&CONDITIONAL_BRANCH_ALIAS_RULES[idx])\n");
    out.push_str("}\n");
    Ok(out)
}

fn fnv1a_update(mut state: u64, bytes: &[u8]) -> u64 {
    for &byte in bytes {
        state ^= u64::from(byte);
        state = state.wrapping_mul(FNV_PRIME);
    }
    state
}

fn compute_cache_key(paths: &[PathBuf]) -> Result<String, Box<dyn std::error::Error>> {
    let mut state = FNV_OFFSET_BASIS;
    for path in paths {
        state = fnv1a_update(state, path.as_os_str().as_encoded_bytes());
        state = fnv1a_update(state, &[0xff]);
        let bytes = fs::read(path)?;
        state = fnv1a_update(state, &bytes);
        state = fnv1a_update(state, &[0x00]);
    }
    Ok(format!("{state:016x}"))
}

fn build_cache_root(out_dir: &Path, bucket: &str) -> PathBuf {
    for ancestor in out_dir.ancestors() {
        if ancestor.file_name() == Some(OsStr::new("target")) {
            return ancestor.join(".jit-build-cache").join(bucket);
        }
    }
    out_dir.join(".jit-build-cache").join(bucket)
}

fn restore_cached_outputs(
    cache_dir: &Path,
    out_dir: &Path,
    output_files: &[&str],
) -> Result<bool, Box<dyn std::error::Error>> {
    if !cache_dir.is_dir() {
        return Ok(false);
    }

    for name in output_files {
        let src = cache_dir.join(name);
        if !src.is_file() {
            return Ok(false);
        }
    }

    fs::create_dir_all(out_dir)?;
    for name in output_files {
        fs::copy(cache_dir.join(name), out_dir.join(name))?;
    }
    Ok(true)
}

fn persist_cached_outputs(
    cache_dir: &Path,
    out_dir: &Path,
    output_files: &[&str],
) -> Result<(), Box<dyn std::error::Error>> {
    fs::create_dir_all(cache_dir)?;
    for name in output_files {
        fs::copy(out_dir.join(name), cache_dir.join(name))?;
    }
    Ok(())
}