relon_codegen_llvm/cocompile.rs
1//! Stage 1.B — LTO co-compile backbone (closed-world `CallNative`).
2//!
3//! GraalVM-style closed-world native dispatch: when the full host-fn
4//! set is known at emit time (the `build.rs` / `emit_object` path,
5//! *not* the open-world MCJIT / `from_source` path), the host Rust is
6//! compiled to LLVM bitcode, linked into the *same* LLVM module as the
7//! emitted Relon code, and run through LTO / inline so every
8//! `Op::CallNative` collapses from a dynamic
9//! `relon_llvm_call_native` helper hop into an inlined unit-internal
10//! call — exactly what `relon-codegen-cranelift`'s *static*
11//! `cap_lookup -> fn_ptr` arm does, but resolved fully at link time.
12//!
13//! ## Toolchain spike (the highest risk, validated first)
14//!
15//! The host bitcode is produced by **rustc's bundled LLVM**, while the
16//! Relon module is built by the **system LLVM 18.1.3** (`inkwell`'s
17//! `llvm18-1` feature). On this host rustc ships LLVM 22 — a 4-major
18//! skew. Raw `rustc --emit=llvm-bc` embeds a ThinLTO module-summary
19//! whose version (12) the LLVM-18 bitcode reader rejects
20//! (`Invalid summary version 12`), so `link_in_module` cannot consume
21//! it directly.
22//!
23//! The bridge that works: emit **textual** IR (`rustc --emit=llvm-ir`)
24//! and parse it **in-process** with inkwell's LLVM-18 parser
25//! (`Context::create_module_from_ir`). LLVM's textual IR is
26//! forward-compatible enough across this skew that the 18.1.3 parser
27//! accepts rustc-22's `.ll`, yielding an LLVM-18 module the inkwell
28//! module links cleanly — no external `llvm-as-18` binary required.
29//! The host fn is then marked
30//! `alwaysinline` so the O3 pipeline fully inlines it (the rustc
31//! default attribute set — `probe-stack` / `target-cpu` — otherwise
32//! makes the cost-model decline even a trivial single-use call).
33//!
34//! Everything here is gated behind explicit calls; the open-world
35//! MCJIT path (`evaluator.rs`) is untouched and remains the default.
36
37use std::process::Command;
38
39use inkwell::attributes::AttributeLoc;
40use inkwell::context::Context;
41use inkwell::execution_engine::ExecutionEngine;
42use inkwell::memory_buffer::MemoryBuffer;
43use inkwell::module::Module as LlvmModule;
44use inkwell::targets::{
45 CodeModel, InitializationConfig, RelocMode, Target, TargetMachine, TargetTriple,
46};
47use inkwell::OptimizationLevel;
48
49use crate::codegen::{emit_module_funcs_closed_world, ConstPool, ENTRY_SYMBOL};
50use crate::error::LlvmError;
51
52/// `extern "C" fn(i64, i64, ...) -> i64` raw entry signature for a
53/// closed-world legacy-i64 entry that JIT-runs without the buffer
54/// arena handshake. Arity is fixed at the call site (`run_i64`).
55type EntryArity1 = unsafe extern "C" fn(i64) -> i64;
56
57/// Result of a closed-world co-compile: the post-O3 module IR text
58/// (for inline-count assertions) plus a JIT execution engine kept
59/// alive alongside its leaked `Context` so callers can run the entry.
60pub struct CocompiledModule {
61 /// The post-O3 module IR text. Callers assert against this:
62 /// zero `call @relon_llvm_call_native` (open-world helper never
63 /// emitted) and zero residual `call @<host_symbol>` (the linked
64 /// host fn was inlined).
65 pub ir_after_opt: String,
66 /// The pre-link / pre-opt module IR text — useful when a test
67 /// wants to confirm the direct `call @<host_symbol>` was the shape
68 /// emitted before inlining erased it.
69 pub ir_before_link: String,
70 // The engine borrows the module which borrows the leaked Context.
71 // Kept last so it drops first; the Context leak means the
72 // `'static` lifetime is sound for the engine's lifetime.
73 engine: ExecutionEngine<'static>,
74}
75
76impl CocompiledModule {
77 /// Run the closed-world legacy-i64 entry with a single i64 arg.
78 ///
79 /// # Safety
80 /// The JIT'd entry is a raw `extern "C" fn(i64) -> i64`; the engine
81 /// owns the code. The caller must have built a single-arg legacy
82 /// entry (the spike fixture does).
83 pub fn run_i64(&self, arg: i64) -> Result<i64, LlvmError> {
84 let f: inkwell::execution_engine::JitFunction<'_, EntryArity1> = unsafe {
85 self.engine
86 .get_function(ENTRY_SYMBOL)
87 .map_err(|e| LlvmError::Codegen(format!("cocompile: entry lookup: {e}")))?
88 };
89 Ok(unsafe { f.call(arg) })
90 }
91}
92
93/// Co-compile a closed-world legacy-i64 IR module against a host shim
94/// crate.
95///
96/// 1. emit the Relon module with `WorldMode::ClosedWorld` so
97/// `Op::CallNative` lowers to a direct `call @<host_symbol>`;
98/// 2. compile `host_shim_src` (a `#[no_mangle] extern "C"` host fn
99/// crate) to textual IR, parsed in-process as an LLVM-18 module;
100/// 3. `link_in_module` the host module into the Relon module;
101/// 4. mark every linked host fn `alwaysinline`;
102/// 5. run the same `default<O3>` pipeline the MCJIT path uses, then
103/// JIT the module.
104///
105/// `ir` must have a legacy-i64 `(i64) -> i64` entry whose body carries
106/// the `Op::CallNative` and an `imports` table naming the host fn.
107pub fn cocompile_legacy_i64(
108 ir: &relon_ir::ir::Module,
109 host_shim_src: &str,
110) -> Result<CocompiledModule, LlvmError> {
111 let entry_idx = ir
112 .entry_func_index
113 .ok_or_else(|| LlvmError::Codegen("cocompile: IR module has no entry function".into()))?;
114 let entry = &ir.funcs[entry_idx];
115
116 // Leak the Context so the engine can hold a `'static` borrow (same
117 // pattern as `LlvmAotEvaluator`).
118 let ctx_box: Box<Context> = Box::new(Context::create());
119 // SAFETY: `ctx_box` lives on the heap and is never freed before the
120 // returned engine; we intentionally leak it.
121 let ctx: &'static Context = unsafe { &*(Box::into_raw(ctx_box) as *const Context) };
122
123 let module = ctx.create_module("relon_llvm_cocompile");
124
125 let const_pool = ConstPool::from_module(ir)?;
126 let helpers: Vec<&relon_ir::ir::Func> = ir
127 .funcs
128 .iter()
129 .enumerate()
130 .filter(|(i, _)| *i != entry_idx)
131 .map(|(_, f)| f)
132 .collect();
133 let helper_ir_indices: Vec<u32> = ir
134 .funcs
135 .iter()
136 .enumerate()
137 .filter(|(i, _)| *i != entry_idx)
138 .map(|(i, _)| i as u32)
139 .collect();
140
141 // Emit with the closed-world flag: `Op::CallNative` -> direct
142 // `call @<host_symbol>`, host fns pre-declared as `extern`.
143 emit_module_funcs_closed_world(
144 ctx,
145 &module,
146 entry,
147 /*buffer_return_size=*/ 0,
148 &const_pool,
149 &helpers,
150 Some(&helper_ir_indices),
151 /*lambdas=*/ &[],
152 /*closure_table=*/ &[],
153 &ir.imports,
154 )?;
155
156 let ir_before_link = module.print_to_string().to_string();
157
158 // Compile + link the host module for every imported host fn, then
159 // force-inline. Shared with the source-driven `emit_object` buffer
160 // path (`evaluator.rs`).
161 link_and_inline_host_shim(&module, host_shim_src, &ir.imports)?;
162
163 run_default_o3_pipeline(&module)?;
164
165 let ir_after_opt = module.print_to_string().to_string();
166
167 let engine = module
168 .create_jit_execution_engine(OptimizationLevel::Aggressive)
169 .map_err(|e| LlvmError::Codegen(format!("cocompile: create JIT engine: {e}")))?;
170
171 Ok(CocompiledModule {
172 ir_after_opt,
173 ir_before_link,
174 engine,
175 })
176}
177
178/// Link a host shim crate's IR into `module` and force-inline
179/// every host fn the `imports` table names.
180///
181/// Shared by both closed-world producers:
182/// - [`cocompile_legacy_i64`] (the hand-built JIT spike fixture);
183/// - `LlvmAotEvaluator::emit_object_with_options` (the source-driven
184/// buffer-protocol object path).
185///
186/// 1. compile `host_shim_src` to textual LLVM IR and parse it in-process
187/// with inkwell (LLVM-18) — the skew bridge (see module docs);
188/// 2. `link_in_module` it into `module`;
189/// 3. stamp `alwaysinline` on every imported host fn that arrived with
190/// a body, so the subsequent O3 pass folds the direct
191/// `call @<host_symbol>` sites into their callers (rustc's default
192/// attribute set otherwise makes the cost-model decline even a
193/// trivial single-use call).
194///
195/// The caller runs the O3 / LTO pipeline afterwards. A host fn the
196/// shim never defined stays an unresolved declaration; that surfaces
197/// downstream (JIT symbol lookup / linker) rather than here.
198pub(crate) fn link_and_inline_host_shim(
199 module: &LlvmModule<'_>,
200 host_shim_src: &str,
201 imports: &[relon_ir::ir::NativeImport],
202) -> Result<(), LlvmError> {
203 link_and_inline_host_shim_for_target(module, host_shim_src, imports, HostShimTarget::Native)
204}
205
206/// Which target the host shim is compiled for. The native path emits an
207/// x86-64 textual IR (host triple); the wasm path emits a
208/// `wasm32-unknown-unknown` textual IR with the `p:32:32` DataLayout so
209/// the linked-in host body matches the relon wasm32 module's pointer
210/// width.
211#[derive(Debug, Clone, Copy, PartialEq, Eq)]
212pub(crate) enum HostShimTarget {
213 Native,
214 Wasm32,
215}
216
217/// Wasm32 sibling of [`link_and_inline_host_shim`]: compile the host
218/// shim with `rustc --target wasm32-unknown-unknown --emit=llvm-ir`, parse
219/// the textual IR in-process (LLVM-18), and `link_in_module` it into a
220/// relon **wasm32** module so a pure-compute host fn the `imports` table
221/// names gets force-inlined into the wasm unit instead of routed across a
222/// WASI import boundary.
223///
224/// ## wasm32 spike result (validated)
225///
226/// rustc-wasm32 textual IR carries `target triple = "wasm32-unknown-unknown"`
227/// and `target datalayout = "e-m:e-p:32:32-…-ni:1:10:20"`. The relon wasm
228/// module pins `wasm32-wasi`. `link_in_module` tolerates the triple
229/// mismatch (LLVM treats a triple disagreement as a warning, not an
230/// error) and the DataLayouts are compatible (both little-endian,
231/// `p:32:32`), so the inkwell LLVM-18 parser accepts the rustc-22 `.ll`
232/// and the post-O3 wasm32 pipeline inlines the host body — exactly the
233/// native bridge, retargeted. The one residual skew (shared with native):
234/// a host fn whose return value LLVM can range-narrow emits a
235/// `range(iN …)` return attribute the LLVM-18 parser rejects; that
236/// surfaces as a `parse host textual IR` error rather than a silent
237/// miscompile.
238/// wasm closed-world host-shim co-compile that only inlines the
239/// **pure-compute** host fns. `effectful[i] == true` marks import index
240/// `i` as effectful (capability-gated) — it stays a `wasm import` and is
241/// *not* force-inlined even if the shim happens to carry a body for it.
242///
243/// The contract for the caller (`emit_object_for_target`): the wasm
244/// closed-world `host_shim_src` should define **only** the pure host fns;
245/// an effectful fn's implementation lives in the trusted host outside the
246/// sandbox (supplied by wasmtime's `Linker` at instantiation). This entry
247/// just makes the inline set explicit so a pure-only shim is the norm.
248pub(crate) fn link_and_inline_host_shim_wasm_pure_only(
249 module: &LlvmModule<'_>,
250 host_shim_src: &str,
251 imports: &[relon_ir::ir::NativeImport],
252 effectful: &[bool],
253) -> Result<(), LlvmError> {
254 let ctx = module.get_context();
255 let host_ll = compile_host_shim_to_textual_ir(host_shim_src, HostShimTarget::Wasm32)?;
256 let buffer = MemoryBuffer::create_from_file(&host_ll)
257 .map_err(|e| LlvmError::Codegen(format!("cocompile: read host wasm .ll: {e}")))?;
258 let host_module = ctx
259 .create_module_from_ir(buffer)
260 .map_err(|e| LlvmError::Codegen(format!("cocompile: parse host wasm textual IR: {e}")))?;
261 module
262 .link_in_module(host_module)
263 .map_err(|e| LlvmError::Codegen(format!("cocompile: wasm link_in_module: {e}")))?;
264
265 let always_inline = ctx.create_enum_attribute(
266 inkwell::attributes::Attribute::get_named_enum_kind_id("alwaysinline"),
267 0,
268 );
269 for (idx, import) in imports.iter().enumerate() {
270 if effectful.get(idx).copied().unwrap_or(false) {
271 // Effectful: keep the wasm import boundary; never inline.
272 continue;
273 }
274 if let Some(host_fn) = module.get_function(&import.name) {
275 if host_fn.get_first_basic_block().is_some() {
276 host_fn.add_attribute(AttributeLoc::Function, always_inline);
277 }
278 }
279 }
280 Ok(())
281}
282
283fn link_and_inline_host_shim_for_target(
284 module: &LlvmModule<'_>,
285 host_shim_src: &str,
286 imports: &[relon_ir::ir::NativeImport],
287 target: HostShimTarget,
288) -> Result<(), LlvmError> {
289 let ctx = module.get_context();
290 let host_ll = compile_host_shim_to_textual_ir(host_shim_src, target)?;
291 // In-process LLVM-18 parse of rustc's textual IR: no external
292 // `llvm-as-18` binary, no rustc-bitcode summary-version skew. LLVM's
293 // textual IR is forward-compatible enough across the rustc/system
294 // LLVM major gap that the 18.1.3 parser accepts rustc's `.ll`.
295 let buffer = MemoryBuffer::create_from_file(&host_ll)
296 .map_err(|e| LlvmError::Codegen(format!("cocompile: read host .ll: {e}")))?;
297 let host_module = ctx
298 .create_module_from_ir(buffer)
299 .map_err(|e| LlvmError::Codegen(format!("cocompile: parse host textual IR: {e}")))?;
300 module
301 .link_in_module(host_module)
302 .map_err(|e| LlvmError::Codegen(format!("cocompile: link_in_module: {e}")))?;
303
304 let always_inline = ctx.create_enum_attribute(
305 inkwell::attributes::Attribute::get_named_enum_kind_id("alwaysinline"),
306 0,
307 );
308 for import in imports {
309 if let Some(host_fn) = module.get_function(&import.name) {
310 if host_fn.get_first_basic_block().is_some() {
311 host_fn.add_attribute(AttributeLoc::Function, always_inline);
312 }
313 }
314 }
315 Ok(())
316}
317
318/// Compile a host shim Rust source to textual LLVM IR.
319///
320/// The skew bridge (see module docs): emit textual IR with rustc and
321/// hand it straight to inkwell's in-process LLVM-18 parser
322/// (`Context::create_module_from_ir`). Textual IR is forward-compatible
323/// enough across the rustc/system-LLVM major gap that the 18.1.3 parser
324/// accepts it — no external assembler, no bitcode summary-version skew.
325/// The returned path is a `.ll` the caller reads via `MemoryBuffer`.
326fn compile_host_shim_to_textual_ir(
327 host_shim_src: &str,
328 target: HostShimTarget,
329) -> Result<std::path::PathBuf, LlvmError> {
330 // Per-invocation unique dir: PID alone collides when two
331 // co-compiles run on the same process (concurrent test threads, or
332 // a JIT + object emit in one build), racing on `host_shim.ll`.
333 static SEQ: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
334 let seq = SEQ.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
335 let dir = std::env::temp_dir().join(format!("relon_cocompile_{}_{seq}", std::process::id()));
336 std::fs::create_dir_all(&dir)
337 .map_err(|e| LlvmError::Codegen(format!("cocompile: mkdir tmp: {e}")))?;
338 let rs_path = dir.join("host_shim.rs");
339 let ll_path = dir.join("host_shim.ll");
340 std::fs::write(&rs_path, host_shim_src)
341 .map_err(|e| LlvmError::Codegen(format!("cocompile: write shim: {e}")))?;
342
343 // 1. rustc --emit=llvm-ir (textual): decouples from rustc's bitcode
344 // binary format / ThinLTO summary version.
345 let mut args: Vec<&str> = vec![
346 "--emit=llvm-ir",
347 "--crate-type=cdylib",
348 "-O",
349 // Single codegen unit so `--emit=llvm-ir` writes one
350 // `host_shim.ll` rather than per-CGU `*.rcgu.0.ll` shards
351 // it then fails to merge under `-o`.
352 "-Ccodegen-units=1",
353 ];
354 // wasm32 retarget: the host body must come out with the wasm32
355 // `p:32:32` DataLayout / triple so it links into the relon wasm32
356 // module (see `link_and_inline_host_shim_wasm` docs).
357 if matches!(target, HostShimTarget::Wasm32) {
358 args.push("--target");
359 args.push("wasm32-unknown-unknown");
360 }
361 args.push(rs_path.to_str().unwrap());
362 args.push("-o");
363 args.push(ll_path.to_str().unwrap());
364 let rustc = Command::new("rustc")
365 .args(&args)
366 .output()
367 .map_err(|e| LlvmError::Codegen(format!("cocompile: spawn rustc: {e}")))?;
368 if !rustc.status.success() {
369 return Err(LlvmError::Codegen(format!(
370 "cocompile: rustc --emit=llvm-ir failed: {}",
371 String::from_utf8_lossy(&rustc.stderr)
372 )));
373 }
374
375 // The textual `.ll` is consumed in-process by inkwell's LLVM-18
376 // parser; no external `llvm-as-18` assembly step.
377 Ok(ll_path)
378}
379
380/// Run the same `default<O3>` middle-end pipeline the MCJIT path uses
381/// (`evaluator.rs::run_default_o3_pipeline`). Re-implemented here
382/// because that one is private to `evaluator.rs`; the knobs are
383/// identical so the optimized shape matches.
384fn run_default_o3_pipeline(module: &LlvmModule<'_>) -> Result<(), LlvmError> {
385 Target::initialize_native(&InitializationConfig::default())
386 .map_err(|e| LlvmError::Codegen(format!("cocompile: initialize_native: {e}")))?;
387 let triple_str = TargetMachine::get_default_triple();
388 let target = Target::from_triple(&triple_str)
389 .map_err(|e| LlvmError::Codegen(format!("cocompile: target from_triple: {e}")))?;
390 let cpu = TargetMachine::get_host_cpu_name();
391 let features = TargetMachine::get_host_cpu_features();
392 let triple = TargetTriple::create(
393 triple_str
394 .as_str()
395 .to_str()
396 .map_err(|e| LlvmError::Codegen(format!("cocompile: triple utf8: {e}")))?,
397 );
398 let machine = target
399 .create_target_machine(
400 &triple,
401 cpu.to_str().unwrap_or(""),
402 features.to_str().unwrap_or(""),
403 OptimizationLevel::Aggressive,
404 RelocMode::Default,
405 CodeModel::JITDefault,
406 )
407 .ok_or_else(|| LlvmError::Codegen("cocompile: create_target_machine null".into()))?;
408 let opts = inkwell::passes::PassBuilderOptions::create();
409 module
410 .run_passes("default<O3>", &machine, opts)
411 .map_err(|e| LlvmError::Codegen(format!("cocompile: run_passes O3: {e}")))?;
412 Ok(())
413}