Skip to main content

relon_codegen_llvm/
cocompile.rs

1//! Stage 1.B — LTO co-compile backbone (closed-world `CallNative`).
2//!
3//! GraalVM-style closed-world native dispatch: when the full host-fn
4//! set is known at emit time (the `build.rs` / `emit_object` path,
5//! *not* the open-world MCJIT / `from_source` path), the host Rust is
6//! compiled to LLVM bitcode, linked into the *same* LLVM module as the
7//! emitted Relon code, and run through LTO / inline so every
8//! `Op::CallNative` collapses from a dynamic
9//! `relon_llvm_call_native` helper hop into an inlined unit-internal
10//! call — exactly what `relon-codegen-cranelift`'s *static*
11//! `cap_lookup -> fn_ptr` arm does, but resolved fully at link time.
12//!
13//! ## Toolchain spike (the highest risk, validated first)
14//!
15//! The host bitcode is produced by **rustc's bundled LLVM**, while the
16//! Relon module is built by the **system LLVM 18.1.3** (`inkwell`'s
17//! `llvm18-1` feature). On this host rustc ships LLVM 22 — a 4-major
18//! skew. Raw `rustc --emit=llvm-bc` embeds a ThinLTO module-summary
19//! whose version (12) the LLVM-18 bitcode reader rejects
20//! (`Invalid summary version 12`), so `link_in_module` cannot consume
21//! it directly.
22//!
23//! The bridge that works: emit **textual** IR (`rustc --emit=llvm-ir`)
24//! and parse it **in-process** with inkwell's LLVM-18 parser
25//! (`Context::create_module_from_ir`). LLVM's textual IR is
26//! forward-compatible enough across this skew that the 18.1.3 parser
27//! accepts rustc-22's `.ll`, yielding an LLVM-18 module the inkwell
28//! module links cleanly — no external `llvm-as-18` binary required.
29//! The host fn is then marked
30//! `alwaysinline` so the O3 pipeline fully inlines it (the rustc
31//! default attribute set — `probe-stack` / `target-cpu` — otherwise
32//! makes the cost-model decline even a trivial single-use call).
33//!
34//! Everything here is gated behind explicit calls; the open-world
35//! MCJIT path (`evaluator.rs`) is untouched and remains the default.
36
37use std::process::Command;
38
39use inkwell::attributes::AttributeLoc;
40use inkwell::context::Context;
41use inkwell::execution_engine::ExecutionEngine;
42use inkwell::memory_buffer::MemoryBuffer;
43use inkwell::module::Module as LlvmModule;
44use inkwell::targets::{
45    CodeModel, InitializationConfig, RelocMode, Target, TargetMachine, TargetTriple,
46};
47use inkwell::OptimizationLevel;
48
49use crate::codegen::{emit_module_funcs_closed_world, ConstPool, ENTRY_SYMBOL};
50use crate::error::LlvmError;
51
52/// `extern "C" fn(i64, i64, ...) -> i64` raw entry signature for a
53/// closed-world legacy-i64 entry that JIT-runs without the buffer
54/// arena handshake. Arity is fixed at the call site (`run_i64`).
55type EntryArity1 = unsafe extern "C" fn(i64) -> i64;
56
57/// Result of a closed-world co-compile: the post-O3 module IR text
58/// (for inline-count assertions) plus a JIT execution engine kept
59/// alive alongside its leaked `Context` so callers can run the entry.
60pub struct CocompiledModule {
61    /// The post-O3 module IR text. Callers assert against this:
62    /// zero `call @relon_llvm_call_native` (open-world helper never
63    /// emitted) and zero residual `call @<host_symbol>` (the linked
64    /// host fn was inlined).
65    pub ir_after_opt: String,
66    /// The pre-link / pre-opt module IR text — useful when a test
67    /// wants to confirm the direct `call @<host_symbol>` was the shape
68    /// emitted before inlining erased it.
69    pub ir_before_link: String,
70    // The engine borrows the module which borrows the leaked Context.
71    // Kept last so it drops first; the Context leak means the
72    // `'static` lifetime is sound for the engine's lifetime.
73    engine: ExecutionEngine<'static>,
74}
75
76impl CocompiledModule {
77    /// Run the closed-world legacy-i64 entry with a single i64 arg.
78    ///
79    /// # Safety
80    /// The JIT'd entry is a raw `extern "C" fn(i64) -> i64`; the engine
81    /// owns the code. The caller must have built a single-arg legacy
82    /// entry (the spike fixture does).
83    pub fn run_i64(&self, arg: i64) -> Result<i64, LlvmError> {
84        let f: inkwell::execution_engine::JitFunction<'_, EntryArity1> = unsafe {
85            self.engine
86                .get_function(ENTRY_SYMBOL)
87                .map_err(|e| LlvmError::Codegen(format!("cocompile: entry lookup: {e}")))?
88        };
89        Ok(unsafe { f.call(arg) })
90    }
91}
92
93/// Co-compile a closed-world legacy-i64 IR module against a host shim
94/// crate.
95///
96/// 1. emit the Relon module with `WorldMode::ClosedWorld` so
97///    `Op::CallNative` lowers to a direct `call @<host_symbol>`;
98/// 2. compile `host_shim_src` (a `#[no_mangle] extern "C"` host fn
99///    crate) to textual IR, parsed in-process as an LLVM-18 module;
100/// 3. `link_in_module` the host module into the Relon module;
101/// 4. mark every linked host fn `alwaysinline`;
102/// 5. run the same `default<O3>` pipeline the MCJIT path uses, then
103///    JIT the module.
104///
105/// `ir` must have a legacy-i64 `(i64) -> i64` entry whose body carries
106/// the `Op::CallNative` and an `imports` table naming the host fn.
107pub fn cocompile_legacy_i64(
108    ir: &relon_ir::ir::Module,
109    host_shim_src: &str,
110) -> Result<CocompiledModule, LlvmError> {
111    let entry_idx = ir
112        .entry_func_index
113        .ok_or_else(|| LlvmError::Codegen("cocompile: IR module has no entry function".into()))?;
114    let entry = &ir.funcs[entry_idx];
115
116    // Leak the Context so the engine can hold a `'static` borrow (same
117    // pattern as `LlvmAotEvaluator`).
118    let ctx_box: Box<Context> = Box::new(Context::create());
119    // SAFETY: `ctx_box` lives on the heap and is never freed before the
120    // returned engine; we intentionally leak it.
121    let ctx: &'static Context = unsafe { &*(Box::into_raw(ctx_box) as *const Context) };
122
123    let module = ctx.create_module("relon_llvm_cocompile");
124
125    let const_pool = ConstPool::from_module(ir)?;
126    let helpers: Vec<&relon_ir::ir::Func> = ir
127        .funcs
128        .iter()
129        .enumerate()
130        .filter(|(i, _)| *i != entry_idx)
131        .map(|(_, f)| f)
132        .collect();
133    let helper_ir_indices: Vec<u32> = ir
134        .funcs
135        .iter()
136        .enumerate()
137        .filter(|(i, _)| *i != entry_idx)
138        .map(|(i, _)| i as u32)
139        .collect();
140
141    // Emit with the closed-world flag: `Op::CallNative` -> direct
142    // `call @<host_symbol>`, host fns pre-declared as `extern`.
143    emit_module_funcs_closed_world(
144        ctx,
145        &module,
146        entry,
147        /*buffer_return_size=*/ 0,
148        &const_pool,
149        &helpers,
150        Some(&helper_ir_indices),
151        /*lambdas=*/ &[],
152        /*closure_table=*/ &[],
153        &ir.imports,
154    )?;
155
156    let ir_before_link = module.print_to_string().to_string();
157
158    // Compile + link the host module for every imported host fn, then
159    // force-inline. Shared with the source-driven `emit_object` buffer
160    // path (`evaluator.rs`).
161    link_and_inline_host_shim(&module, host_shim_src, &ir.imports)?;
162
163    run_default_o3_pipeline(&module)?;
164
165    let ir_after_opt = module.print_to_string().to_string();
166
167    let engine = module
168        .create_jit_execution_engine(OptimizationLevel::Aggressive)
169        .map_err(|e| LlvmError::Codegen(format!("cocompile: create JIT engine: {e}")))?;
170
171    Ok(CocompiledModule {
172        ir_after_opt,
173        ir_before_link,
174        engine,
175    })
176}
177
178/// Link a host shim crate's IR into `module` and force-inline
179/// every host fn the `imports` table names.
180///
181/// Shared by both closed-world producers:
182/// - [`cocompile_legacy_i64`] (the hand-built JIT spike fixture);
183/// - `LlvmAotEvaluator::emit_object_with_options` (the source-driven
184///   buffer-protocol object path).
185///
186/// 1. compile `host_shim_src` to textual LLVM IR and parse it in-process
187///    with inkwell (LLVM-18) — the skew bridge (see module docs);
188/// 2. `link_in_module` it into `module`;
189/// 3. stamp `alwaysinline` on every imported host fn that arrived with
190///    a body, so the subsequent O3 pass folds the direct
191///    `call @<host_symbol>` sites into their callers (rustc's default
192///    attribute set otherwise makes the cost-model decline even a
193///    trivial single-use call).
194///
195/// The caller runs the O3 / LTO pipeline afterwards. A host fn the
196/// shim never defined stays an unresolved declaration; that surfaces
197/// downstream (JIT symbol lookup / linker) rather than here.
198pub(crate) fn link_and_inline_host_shim(
199    module: &LlvmModule<'_>,
200    host_shim_src: &str,
201    imports: &[relon_ir::ir::NativeImport],
202) -> Result<(), LlvmError> {
203    link_and_inline_host_shim_for_target(module, host_shim_src, imports, HostShimTarget::Native)
204}
205
206/// Which target the host shim is compiled for. The native path emits an
207/// x86-64 textual IR (host triple); the wasm path emits a
208/// `wasm32-unknown-unknown` textual IR with the `p:32:32` DataLayout so
209/// the linked-in host body matches the relon wasm32 module's pointer
210/// width.
211#[derive(Debug, Clone, Copy, PartialEq, Eq)]
212pub(crate) enum HostShimTarget {
213    Native,
214    Wasm32,
215}
216
217/// Wasm32 sibling of [`link_and_inline_host_shim`]: compile the host
218/// shim with `rustc --target wasm32-unknown-unknown --emit=llvm-ir`, parse
219/// the textual IR in-process (LLVM-18), and `link_in_module` it into a
220/// relon **wasm32** module so a pure-compute host fn the `imports` table
221/// names gets force-inlined into the wasm unit instead of routed across a
222/// WASI import boundary.
223///
224/// ## wasm32 spike result (validated)
225///
226/// rustc-wasm32 textual IR carries `target triple = "wasm32-unknown-unknown"`
227/// and `target datalayout = "e-m:e-p:32:32-…-ni:1:10:20"`. The relon wasm
228/// module pins `wasm32-wasi`. `link_in_module` tolerates the triple
229/// mismatch (LLVM treats a triple disagreement as a warning, not an
230/// error) and the DataLayouts are compatible (both little-endian,
231/// `p:32:32`), so the inkwell LLVM-18 parser accepts the rustc-22 `.ll`
232/// and the post-O3 wasm32 pipeline inlines the host body — exactly the
233/// native bridge, retargeted. The one residual skew (shared with native):
234/// a host fn whose return value LLVM can range-narrow emits a
235/// `range(iN …)` return attribute the LLVM-18 parser rejects; that
236/// surfaces as a `parse host textual IR` error rather than a silent
237/// miscompile.
238/// wasm closed-world host-shim co-compile that only inlines the
239/// **pure-compute** host fns. `effectful[i] == true` marks import index
240/// `i` as effectful (capability-gated) — it stays a `wasm import` and is
241/// *not* force-inlined even if the shim happens to carry a body for it.
242///
243/// The contract for the caller (`emit_object_for_target`): the wasm
244/// closed-world `host_shim_src` should define **only** the pure host fns;
245/// an effectful fn's implementation lives in the trusted host outside the
246/// sandbox (supplied by wasmtime's `Linker` at instantiation). This entry
247/// just makes the inline set explicit so a pure-only shim is the norm.
248pub(crate) fn link_and_inline_host_shim_wasm_pure_only(
249    module: &LlvmModule<'_>,
250    host_shim_src: &str,
251    imports: &[relon_ir::ir::NativeImport],
252    effectful: &[bool],
253) -> Result<(), LlvmError> {
254    let ctx = module.get_context();
255    let host_ll = compile_host_shim_to_textual_ir(host_shim_src, HostShimTarget::Wasm32)?;
256    let buffer = MemoryBuffer::create_from_file(&host_ll)
257        .map_err(|e| LlvmError::Codegen(format!("cocompile: read host wasm .ll: {e}")))?;
258    let host_module = ctx
259        .create_module_from_ir(buffer)
260        .map_err(|e| LlvmError::Codegen(format!("cocompile: parse host wasm textual IR: {e}")))?;
261    module
262        .link_in_module(host_module)
263        .map_err(|e| LlvmError::Codegen(format!("cocompile: wasm link_in_module: {e}")))?;
264
265    let always_inline = ctx.create_enum_attribute(
266        inkwell::attributes::Attribute::get_named_enum_kind_id("alwaysinline"),
267        0,
268    );
269    for (idx, import) in imports.iter().enumerate() {
270        if effectful.get(idx).copied().unwrap_or(false) {
271            // Effectful: keep the wasm import boundary; never inline.
272            continue;
273        }
274        if let Some(host_fn) = module.get_function(&import.name) {
275            if host_fn.get_first_basic_block().is_some() {
276                host_fn.add_attribute(AttributeLoc::Function, always_inline);
277            }
278        }
279    }
280    Ok(())
281}
282
283fn link_and_inline_host_shim_for_target(
284    module: &LlvmModule<'_>,
285    host_shim_src: &str,
286    imports: &[relon_ir::ir::NativeImport],
287    target: HostShimTarget,
288) -> Result<(), LlvmError> {
289    let ctx = module.get_context();
290    let host_ll = compile_host_shim_to_textual_ir(host_shim_src, target)?;
291    // In-process LLVM-18 parse of rustc's textual IR: no external
292    // `llvm-as-18` binary, no rustc-bitcode summary-version skew. LLVM's
293    // textual IR is forward-compatible enough across the rustc/system
294    // LLVM major gap that the 18.1.3 parser accepts rustc's `.ll`.
295    let buffer = MemoryBuffer::create_from_file(&host_ll)
296        .map_err(|e| LlvmError::Codegen(format!("cocompile: read host .ll: {e}")))?;
297    let host_module = ctx
298        .create_module_from_ir(buffer)
299        .map_err(|e| LlvmError::Codegen(format!("cocompile: parse host textual IR: {e}")))?;
300    module
301        .link_in_module(host_module)
302        .map_err(|e| LlvmError::Codegen(format!("cocompile: link_in_module: {e}")))?;
303
304    let always_inline = ctx.create_enum_attribute(
305        inkwell::attributes::Attribute::get_named_enum_kind_id("alwaysinline"),
306        0,
307    );
308    for import in imports {
309        if let Some(host_fn) = module.get_function(&import.name) {
310            if host_fn.get_first_basic_block().is_some() {
311                host_fn.add_attribute(AttributeLoc::Function, always_inline);
312            }
313        }
314    }
315    Ok(())
316}
317
318/// Compile a host shim Rust source to textual LLVM IR.
319///
320/// The skew bridge (see module docs): emit textual IR with rustc and
321/// hand it straight to inkwell's in-process LLVM-18 parser
322/// (`Context::create_module_from_ir`). Textual IR is forward-compatible
323/// enough across the rustc/system-LLVM major gap that the 18.1.3 parser
324/// accepts it — no external assembler, no bitcode summary-version skew.
325/// The returned path is a `.ll` the caller reads via `MemoryBuffer`.
326fn compile_host_shim_to_textual_ir(
327    host_shim_src: &str,
328    target: HostShimTarget,
329) -> Result<std::path::PathBuf, LlvmError> {
330    // Per-invocation unique dir: PID alone collides when two
331    // co-compiles run on the same process (concurrent test threads, or
332    // a JIT + object emit in one build), racing on `host_shim.ll`.
333    static SEQ: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
334    let seq = SEQ.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
335    let dir = std::env::temp_dir().join(format!("relon_cocompile_{}_{seq}", std::process::id()));
336    std::fs::create_dir_all(&dir)
337        .map_err(|e| LlvmError::Codegen(format!("cocompile: mkdir tmp: {e}")))?;
338    let rs_path = dir.join("host_shim.rs");
339    let ll_path = dir.join("host_shim.ll");
340    std::fs::write(&rs_path, host_shim_src)
341        .map_err(|e| LlvmError::Codegen(format!("cocompile: write shim: {e}")))?;
342
343    // 1. rustc --emit=llvm-ir (textual): decouples from rustc's bitcode
344    //    binary format / ThinLTO summary version.
345    let mut args: Vec<&str> = vec![
346        "--emit=llvm-ir",
347        "--crate-type=cdylib",
348        "-O",
349        // Single codegen unit so `--emit=llvm-ir` writes one
350        // `host_shim.ll` rather than per-CGU `*.rcgu.0.ll` shards
351        // it then fails to merge under `-o`.
352        "-Ccodegen-units=1",
353    ];
354    // wasm32 retarget: the host body must come out with the wasm32
355    // `p:32:32` DataLayout / triple so it links into the relon wasm32
356    // module (see `link_and_inline_host_shim_wasm` docs).
357    if matches!(target, HostShimTarget::Wasm32) {
358        args.push("--target");
359        args.push("wasm32-unknown-unknown");
360    }
361    args.push(rs_path.to_str().unwrap());
362    args.push("-o");
363    args.push(ll_path.to_str().unwrap());
364    let rustc = Command::new("rustc")
365        .args(&args)
366        .output()
367        .map_err(|e| LlvmError::Codegen(format!("cocompile: spawn rustc: {e}")))?;
368    if !rustc.status.success() {
369        return Err(LlvmError::Codegen(format!(
370            "cocompile: rustc --emit=llvm-ir failed: {}",
371            String::from_utf8_lossy(&rustc.stderr)
372        )));
373    }
374
375    // The textual `.ll` is consumed in-process by inkwell's LLVM-18
376    // parser; no external `llvm-as-18` assembly step.
377    Ok(ll_path)
378}
379
380/// Run the same `default<O3>` middle-end pipeline the MCJIT path uses
381/// (`evaluator.rs::run_default_o3_pipeline`). Re-implemented here
382/// because that one is private to `evaluator.rs`; the knobs are
383/// identical so the optimized shape matches.
384fn run_default_o3_pipeline(module: &LlvmModule<'_>) -> Result<(), LlvmError> {
385    Target::initialize_native(&InitializationConfig::default())
386        .map_err(|e| LlvmError::Codegen(format!("cocompile: initialize_native: {e}")))?;
387    let triple_str = TargetMachine::get_default_triple();
388    let target = Target::from_triple(&triple_str)
389        .map_err(|e| LlvmError::Codegen(format!("cocompile: target from_triple: {e}")))?;
390    let cpu = TargetMachine::get_host_cpu_name();
391    let features = TargetMachine::get_host_cpu_features();
392    let triple = TargetTriple::create(
393        triple_str
394            .as_str()
395            .to_str()
396            .map_err(|e| LlvmError::Codegen(format!("cocompile: triple utf8: {e}")))?,
397    );
398    let machine = target
399        .create_target_machine(
400            &triple,
401            cpu.to_str().unwrap_or(""),
402            features.to_str().unwrap_or(""),
403            OptimizationLevel::Aggressive,
404            RelocMode::Default,
405            CodeModel::JITDefault,
406        )
407        .ok_or_else(|| LlvmError::Codegen("cocompile: create_target_machine null".into()))?;
408    let opts = inkwell::passes::PassBuilderOptions::create();
409    module
410        .run_passes("default<O3>", &machine, opts)
411        .map_err(|e| LlvmError::Codegen(format!("cocompile: run_passes O3: {e}")))?;
412    Ok(())
413}