relon-codegen-llvm 0.1.0-rc2

LLVM-backed AOT evaluator for Relon (Phase A bootstrap)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
//! Stage 1.B — LTO co-compile backbone (closed-world `CallNative`).
//!
//! GraalVM-style closed-world native dispatch: when the full host-fn
//! set is known at emit time (the `build.rs` / `emit_object` path,
//! *not* the open-world MCJIT / `from_source` path), the host Rust is
//! compiled to LLVM bitcode, linked into the *same* LLVM module as the
//! emitted Relon code, and run through LTO / inline so every
//! `Op::CallNative` collapses from a dynamic
//! `relon_llvm_call_native` helper hop into an inlined unit-internal
//! call — exactly what `relon-codegen-cranelift`'s *static*
//! `cap_lookup -> fn_ptr` arm does, but resolved fully at link time.
//!
//! ## Toolchain spike (the highest risk, validated first)
//!
//! The host bitcode is produced by **rustc's bundled LLVM**, while the
//! Relon module is built by the **system LLVM 18.1.3** (`inkwell`'s
//! `llvm18-1` feature). On this host rustc ships LLVM 22 — a 4-major
//! skew. Raw `rustc --emit=llvm-bc` embeds a ThinLTO module-summary
//! whose version (12) the LLVM-18 bitcode reader rejects
//! (`Invalid summary version 12`), so `link_in_module` cannot consume
//! it directly.
//!
//! The bridge that works: emit **textual** IR (`rustc --emit=llvm-ir`)
//! and parse it **in-process** with inkwell's LLVM-18 parser
//! (`Context::create_module_from_ir`). LLVM's textual IR is
//! forward-compatible enough across this skew that the 18.1.3 parser
//! accepts rustc-22's `.ll`, yielding an LLVM-18 module the inkwell
//! module links cleanly — no external `llvm-as-18` binary required.
//! The host fn is then marked
//! `alwaysinline` so the O3 pipeline fully inlines it (the rustc
//! default attribute set — `probe-stack` / `target-cpu` — otherwise
//! makes the cost-model decline even a trivial single-use call).
//!
//! Everything here is gated behind explicit calls; the open-world
//! MCJIT path (`evaluator.rs`) is untouched and remains the default.

use std::process::Command;

use inkwell::attributes::AttributeLoc;
use inkwell::context::Context;
use inkwell::execution_engine::ExecutionEngine;
use inkwell::memory_buffer::MemoryBuffer;
use inkwell::module::Module as LlvmModule;
use inkwell::targets::{
    CodeModel, InitializationConfig, RelocMode, Target, TargetMachine, TargetTriple,
};
use inkwell::OptimizationLevel;

use crate::codegen::{emit_module_funcs_closed_world, ConstPool, ENTRY_SYMBOL};
use crate::error::LlvmError;

/// `extern "C" fn(i64, i64, ...) -> i64` raw entry signature for a
/// closed-world legacy-i64 entry that JIT-runs without the buffer
/// arena handshake. Arity is fixed at the call site (`run_i64`).
type EntryArity1 = unsafe extern "C" fn(i64) -> i64;

/// Result of a closed-world co-compile: the post-O3 module IR text
/// (for inline-count assertions) plus a JIT execution engine kept
/// alive alongside its leaked `Context` so callers can run the entry.
pub struct CocompiledModule {
    /// The post-O3 module IR text. Callers assert against this:
    /// zero `call @relon_llvm_call_native` (open-world helper never
    /// emitted) and zero residual `call @<host_symbol>` (the linked
    /// host fn was inlined).
    pub ir_after_opt: String,
    /// The pre-link / pre-opt module IR text — useful when a test
    /// wants to confirm the direct `call @<host_symbol>` was the shape
    /// emitted before inlining erased it.
    pub ir_before_link: String,
    // The engine borrows the module which borrows the leaked Context.
    // Kept last so it drops first; the Context leak means the
    // `'static` lifetime is sound for the engine's lifetime.
    engine: ExecutionEngine<'static>,
}

impl CocompiledModule {
    /// Run the closed-world legacy-i64 entry with a single i64 arg.
    ///
    /// # Safety
    /// The JIT'd entry is a raw `extern "C" fn(i64) -> i64`; the engine
    /// owns the code. The caller must have built a single-arg legacy
    /// entry (the spike fixture does).
    pub fn run_i64(&self, arg: i64) -> Result<i64, LlvmError> {
        let f: inkwell::execution_engine::JitFunction<'_, EntryArity1> = unsafe {
            self.engine
                .get_function(ENTRY_SYMBOL)
                .map_err(|e| LlvmError::Codegen(format!("cocompile: entry lookup: {e}")))?
        };
        Ok(unsafe { f.call(arg) })
    }
}

/// Co-compile a closed-world legacy-i64 IR module against a host shim
/// crate.
///
/// 1. emit the Relon module with `WorldMode::ClosedWorld` so
///    `Op::CallNative` lowers to a direct `call @<host_symbol>`;
/// 2. compile `host_shim_src` (a `#[no_mangle] extern "C"` host fn
///    crate) to textual IR, parsed in-process as an LLVM-18 module;
/// 3. `link_in_module` the host module into the Relon module;
/// 4. mark every linked host fn `alwaysinline`;
/// 5. run the same `default<O3>` pipeline the MCJIT path uses, then
///    JIT the module.
///
/// `ir` must have a legacy-i64 `(i64) -> i64` entry whose body carries
/// the `Op::CallNative` and an `imports` table naming the host fn.
pub fn cocompile_legacy_i64(
    ir: &relon_ir::ir::Module,
    host_shim_src: &str,
) -> Result<CocompiledModule, LlvmError> {
    let entry_idx = ir
        .entry_func_index
        .ok_or_else(|| LlvmError::Codegen("cocompile: IR module has no entry function".into()))?;
    let entry = &ir.funcs[entry_idx];

    // Leak the Context so the engine can hold a `'static` borrow (same
    // pattern as `LlvmAotEvaluator`).
    let ctx_box: Box<Context> = Box::new(Context::create());
    // SAFETY: `ctx_box` lives on the heap and is never freed before the
    // returned engine; we intentionally leak it.
    let ctx: &'static Context = unsafe { &*(Box::into_raw(ctx_box) as *const Context) };

    let module = ctx.create_module("relon_llvm_cocompile");

    let const_pool = ConstPool::from_module(ir)?;
    let helpers: Vec<&relon_ir::ir::Func> = ir
        .funcs
        .iter()
        .enumerate()
        .filter(|(i, _)| *i != entry_idx)
        .map(|(_, f)| f)
        .collect();
    let helper_ir_indices: Vec<u32> = ir
        .funcs
        .iter()
        .enumerate()
        .filter(|(i, _)| *i != entry_idx)
        .map(|(i, _)| i as u32)
        .collect();

    // Emit with the closed-world flag: `Op::CallNative` -> direct
    // `call @<host_symbol>`, host fns pre-declared as `extern`.
    emit_module_funcs_closed_world(
        ctx,
        &module,
        entry,
        /*buffer_return_size=*/ 0,
        &const_pool,
        &helpers,
        Some(&helper_ir_indices),
        /*lambdas=*/ &[],
        /*closure_table=*/ &[],
        &ir.imports,
    )?;

    let ir_before_link = module.print_to_string().to_string();

    // Compile + link the host module for every imported host fn, then
    // force-inline. Shared with the source-driven `emit_object` buffer
    // path (`evaluator.rs`).
    link_and_inline_host_shim(&module, host_shim_src, &ir.imports)?;

    run_default_o3_pipeline(&module)?;

    let ir_after_opt = module.print_to_string().to_string();

    let engine = module
        .create_jit_execution_engine(OptimizationLevel::Aggressive)
        .map_err(|e| LlvmError::Codegen(format!("cocompile: create JIT engine: {e}")))?;

    Ok(CocompiledModule {
        ir_after_opt,
        ir_before_link,
        engine,
    })
}

/// Link a host shim crate's IR into `module` and force-inline
/// every host fn the `imports` table names.
///
/// Shared by both closed-world producers:
/// - [`cocompile_legacy_i64`] (the hand-built JIT spike fixture);
/// - `LlvmAotEvaluator::emit_object_with_options` (the source-driven
///   buffer-protocol object path).
///
/// 1. compile `host_shim_src` to textual LLVM IR and parse it in-process
///    with inkwell (LLVM-18) — the skew bridge (see module docs);
/// 2. `link_in_module` it into `module`;
/// 3. stamp `alwaysinline` on every imported host fn that arrived with
///    a body, so the subsequent O3 pass folds the direct
///    `call @<host_symbol>` sites into their callers (rustc's default
///    attribute set otherwise makes the cost-model decline even a
///    trivial single-use call).
///
/// The caller runs the O3 / LTO pipeline afterwards. A host fn the
/// shim never defined stays an unresolved declaration; that surfaces
/// downstream (JIT symbol lookup / linker) rather than here.
pub(crate) fn link_and_inline_host_shim(
    module: &LlvmModule<'_>,
    host_shim_src: &str,
    imports: &[relon_ir::ir::NativeImport],
) -> Result<(), LlvmError> {
    link_and_inline_host_shim_for_target(module, host_shim_src, imports, HostShimTarget::Native)
}

/// Which target the host shim is compiled for. The native path emits an
/// x86-64 textual IR (host triple); the wasm path emits a
/// `wasm32-unknown-unknown` textual IR with the `p:32:32` DataLayout so
/// the linked-in host body matches the relon wasm32 module's pointer
/// width.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum HostShimTarget {
    Native,
    Wasm32,
}

/// Wasm32 sibling of [`link_and_inline_host_shim`]: compile the host
/// shim with `rustc --target wasm32-unknown-unknown --emit=llvm-ir`, parse
/// the textual IR in-process (LLVM-18), and `link_in_module` it into a
/// relon **wasm32** module so a pure-compute host fn the `imports` table
/// names gets force-inlined into the wasm unit instead of routed across a
/// WASI import boundary.
///
/// ## wasm32 spike result (validated)
///
/// rustc-wasm32 textual IR carries `target triple = "wasm32-unknown-unknown"`
/// and `target datalayout = "e-m:e-p:32:32-…-ni:1:10:20"`. The relon wasm
/// module pins `wasm32-wasi`. `link_in_module` tolerates the triple
/// mismatch (LLVM treats a triple disagreement as a warning, not an
/// error) and the DataLayouts are compatible (both little-endian,
/// `p:32:32`), so the inkwell LLVM-18 parser accepts the rustc-22 `.ll`
/// and the post-O3 wasm32 pipeline inlines the host body — exactly the
/// native bridge, retargeted. The one residual skew (shared with native):
/// a host fn whose return value LLVM can range-narrow emits a
/// `range(iN …)` return attribute the LLVM-18 parser rejects; that
/// surfaces as a `parse host textual IR` error rather than a silent
/// miscompile.
/// wasm closed-world host-shim co-compile that only inlines the
/// **pure-compute** host fns. `effectful[i] == true` marks import index
/// `i` as effectful (capability-gated) — it stays a `wasm import` and is
/// *not* force-inlined even if the shim happens to carry a body for it.
///
/// The contract for the caller (`emit_object_for_target`): the wasm
/// closed-world `host_shim_src` should define **only** the pure host fns;
/// an effectful fn's implementation lives in the trusted host outside the
/// sandbox (supplied by wasmtime's `Linker` at instantiation). This entry
/// just makes the inline set explicit so a pure-only shim is the norm.
pub(crate) fn link_and_inline_host_shim_wasm_pure_only(
    module: &LlvmModule<'_>,
    host_shim_src: &str,
    imports: &[relon_ir::ir::NativeImport],
    effectful: &[bool],
) -> Result<(), LlvmError> {
    let ctx = module.get_context();
    let host_ll = compile_host_shim_to_textual_ir(host_shim_src, HostShimTarget::Wasm32)?;
    let buffer = MemoryBuffer::create_from_file(&host_ll)
        .map_err(|e| LlvmError::Codegen(format!("cocompile: read host wasm .ll: {e}")))?;
    let host_module = ctx
        .create_module_from_ir(buffer)
        .map_err(|e| LlvmError::Codegen(format!("cocompile: parse host wasm textual IR: {e}")))?;
    module
        .link_in_module(host_module)
        .map_err(|e| LlvmError::Codegen(format!("cocompile: wasm link_in_module: {e}")))?;

    let always_inline = ctx.create_enum_attribute(
        inkwell::attributes::Attribute::get_named_enum_kind_id("alwaysinline"),
        0,
    );
    for (idx, import) in imports.iter().enumerate() {
        if effectful.get(idx).copied().unwrap_or(false) {
            // Effectful: keep the wasm import boundary; never inline.
            continue;
        }
        if let Some(host_fn) = module.get_function(&import.name) {
            if host_fn.get_first_basic_block().is_some() {
                host_fn.add_attribute(AttributeLoc::Function, always_inline);
            }
        }
    }
    Ok(())
}

fn link_and_inline_host_shim_for_target(
    module: &LlvmModule<'_>,
    host_shim_src: &str,
    imports: &[relon_ir::ir::NativeImport],
    target: HostShimTarget,
) -> Result<(), LlvmError> {
    let ctx = module.get_context();
    let host_ll = compile_host_shim_to_textual_ir(host_shim_src, target)?;
    // In-process LLVM-18 parse of rustc's textual IR: no external
    // `llvm-as-18` binary, no rustc-bitcode summary-version skew. LLVM's
    // textual IR is forward-compatible enough across the rustc/system
    // LLVM major gap that the 18.1.3 parser accepts rustc's `.ll`.
    let buffer = MemoryBuffer::create_from_file(&host_ll)
        .map_err(|e| LlvmError::Codegen(format!("cocompile: read host .ll: {e}")))?;
    let host_module = ctx
        .create_module_from_ir(buffer)
        .map_err(|e| LlvmError::Codegen(format!("cocompile: parse host textual IR: {e}")))?;
    module
        .link_in_module(host_module)
        .map_err(|e| LlvmError::Codegen(format!("cocompile: link_in_module: {e}")))?;

    let always_inline = ctx.create_enum_attribute(
        inkwell::attributes::Attribute::get_named_enum_kind_id("alwaysinline"),
        0,
    );
    for import in imports {
        if let Some(host_fn) = module.get_function(&import.name) {
            if host_fn.get_first_basic_block().is_some() {
                host_fn.add_attribute(AttributeLoc::Function, always_inline);
            }
        }
    }
    Ok(())
}

/// Compile a host shim Rust source to textual LLVM IR.
///
/// The skew bridge (see module docs): emit textual IR with rustc and
/// hand it straight to inkwell's in-process LLVM-18 parser
/// (`Context::create_module_from_ir`). Textual IR is forward-compatible
/// enough across the rustc/system-LLVM major gap that the 18.1.3 parser
/// accepts it — no external assembler, no bitcode summary-version skew.
/// The returned path is a `.ll` the caller reads via `MemoryBuffer`.
fn compile_host_shim_to_textual_ir(
    host_shim_src: &str,
    target: HostShimTarget,
) -> Result<std::path::PathBuf, LlvmError> {
    // Per-invocation unique dir: PID alone collides when two
    // co-compiles run on the same process (concurrent test threads, or
    // a JIT + object emit in one build), racing on `host_shim.ll`.
    static SEQ: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
    let seq = SEQ.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
    let dir = std::env::temp_dir().join(format!("relon_cocompile_{}_{seq}", std::process::id()));
    std::fs::create_dir_all(&dir)
        .map_err(|e| LlvmError::Codegen(format!("cocompile: mkdir tmp: {e}")))?;
    let rs_path = dir.join("host_shim.rs");
    let ll_path = dir.join("host_shim.ll");
    std::fs::write(&rs_path, host_shim_src)
        .map_err(|e| LlvmError::Codegen(format!("cocompile: write shim: {e}")))?;

    // 1. rustc --emit=llvm-ir (textual): decouples from rustc's bitcode
    //    binary format / ThinLTO summary version.
    let mut args: Vec<&str> = vec![
        "--emit=llvm-ir",
        "--crate-type=cdylib",
        "-O",
        // Single codegen unit so `--emit=llvm-ir` writes one
        // `host_shim.ll` rather than per-CGU `*.rcgu.0.ll` shards
        // it then fails to merge under `-o`.
        "-Ccodegen-units=1",
    ];
    // wasm32 retarget: the host body must come out with the wasm32
    // `p:32:32` DataLayout / triple so it links into the relon wasm32
    // module (see `link_and_inline_host_shim_wasm` docs).
    if matches!(target, HostShimTarget::Wasm32) {
        args.push("--target");
        args.push("wasm32-unknown-unknown");
    }
    args.push(rs_path.to_str().unwrap());
    args.push("-o");
    args.push(ll_path.to_str().unwrap());
    let rustc = Command::new("rustc")
        .args(&args)
        .output()
        .map_err(|e| LlvmError::Codegen(format!("cocompile: spawn rustc: {e}")))?;
    if !rustc.status.success() {
        return Err(LlvmError::Codegen(format!(
            "cocompile: rustc --emit=llvm-ir failed: {}",
            String::from_utf8_lossy(&rustc.stderr)
        )));
    }

    // The textual `.ll` is consumed in-process by inkwell's LLVM-18
    // parser; no external `llvm-as-18` assembly step.
    Ok(ll_path)
}

/// Run the same `default<O3>` middle-end pipeline the MCJIT path uses
/// (`evaluator.rs::run_default_o3_pipeline`). Re-implemented here
/// because that one is private to `evaluator.rs`; the knobs are
/// identical so the optimized shape matches.
fn run_default_o3_pipeline(module: &LlvmModule<'_>) -> Result<(), LlvmError> {
    Target::initialize_native(&InitializationConfig::default())
        .map_err(|e| LlvmError::Codegen(format!("cocompile: initialize_native: {e}")))?;
    let triple_str = TargetMachine::get_default_triple();
    let target = Target::from_triple(&triple_str)
        .map_err(|e| LlvmError::Codegen(format!("cocompile: target from_triple: {e}")))?;
    let cpu = TargetMachine::get_host_cpu_name();
    let features = TargetMachine::get_host_cpu_features();
    let triple = TargetTriple::create(
        triple_str
            .as_str()
            .to_str()
            .map_err(|e| LlvmError::Codegen(format!("cocompile: triple utf8: {e}")))?,
    );
    let machine = target
        .create_target_machine(
            &triple,
            cpu.to_str().unwrap_or(""),
            features.to_str().unwrap_or(""),
            OptimizationLevel::Aggressive,
            RelocMode::Default,
            CodeModel::JITDefault,
        )
        .ok_or_else(|| LlvmError::Codegen("cocompile: create_target_machine null".into()))?;
    let opts = inkwell::passes::PassBuilderOptions::create();
    module
        .run_passes("default<O3>", &machine, opts)
        .map_err(|e| LlvmError::Codegen(format!("cocompile: run_passes O3: {e}")))?;
    Ok(())
}