rlx-oneapi 0.2.10

Intel oneAPI Level Zero backend for RLX (Arc / Data Center Max via libze_loader + SPIR-V compute kernels)
Documentation
//! Compile the OpenCL-C compute kernels under `kernels/*.cl` to (Kernel-flavor)
//! SPIR-V for the Level Zero module loader, and embed the resulting `.spv`
//! blobs via a generated `kernels_generated.rs`.
//!
//! ## Why OpenCL-C and not GLSL/naga
//!
//! Level Zero's `zeModuleCreate(ZE_MODULE_FORMAT_IL_SPIRV)` +
//! `zeKernelSetArgumentValue` consume **OpenCL/Kernel-flavor** SPIR-V — entry
//! points declared `OpEntryPoint Kernel`, arguments passed as kernel function
//! parameters, the Physical64 addressing model. That is a *different* SPIR-V
//! dialect from the Vulkan **Shader/GLCompute** flavor naga emits from GLSL
//! (push-constant blocks + descriptor-bound buffers). They are not
//! interchangeable, so the native Intel path is authored in OpenCL-C and lowered
//! by Intel's offline compiler (`ocloc`) — the same SPIR-V Intel's GPU compiler
//! ingests at clBuildProgram / SYCL time.
//!
//! ## Cross-platform builds
//!
//! `ocloc` ships only with the Intel oneAPI / Compute-Runtime toolchain, which
//! is absent on this macOS dev box and on CI. So kernel compilation is **opt-in
//! and best-effort**: it runs only when `RLX_ONEAPI_BUILD_KERNELS=1` *and*
//! `ocloc` is on `PATH`. Otherwise (the default everywhere off an Intel build
//! host) no blobs are embedded — the crate still compiles, and the backend
//! serves every op through the `rlx-cpu` reference path. This mirrors how
//! rlx-cuda ships `.cu` sources validated only in its Linux Docker image.

use std::fmt::Write as _;
use std::path::Path;
use std::process::Command;
use std::{env, fs};

fn main() {
    let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
    let kernel_dir = Path::new(&manifest_dir).join("kernels");
    let out_dir = env::var("OUT_DIR").unwrap();

    println!("cargo:rerun-if-changed=kernels");
    println!("cargo:rerun-if-changed=build.rs");
    println!("cargo:rerun-if-env-changed=RLX_ONEAPI_BUILD_KERNELS");
    println!("cargo:rerun-if-env-changed=RLX_ONEAPI_OCLOC_DEVICE");

    let want_build = env::var("RLX_ONEAPI_BUILD_KERNELS").as_deref() == Ok("1");
    let ocloc = env::var("RLX_ONEAPI_OCLOC").unwrap_or_else(|_| "ocloc".to_string());
    // Intel device token ocloc compiles for; any modern Arc/PVC token works
    // since we stop at SPIR-V (`-spv_only`). Default to a common one.
    let device_token = env::var("RLX_ONEAPI_OCLOC_DEVICE").unwrap_or_else(|_| "pvc".to_string());

    let mut entries: Vec<String> = Vec::new();

    if want_build {
        let mut files: Vec<_> = fs::read_dir(&kernel_dir)
            .unwrap_or_else(|e| panic!("rlx-oneapi: cannot read {}: {e}", kernel_dir.display()))
            .filter_map(Result::ok)
            .map(|e| e.path())
            .filter(|p| p.extension().map(|x| x == "cl").unwrap_or(false))
            .collect();
        files.sort();

        for path in &files {
            println!("cargo:rerun-if-changed={}", path.display());
            let name = path.file_stem().unwrap().to_string_lossy().to_string();
            let spv_path = Path::new(&out_dir).join(format!("{name}.spv"));
            match compile_cl_to_spirv(&ocloc, &device_token, path, &spv_path) {
                Ok(()) => entries.push(name),
                Err(e) => println!(
                    "cargo:warning=rlx-oneapi: skipped kernel {name}.cl ({e}); \
                     it will run via the CPU reference path"
                ),
            }
        }
    } else {
        println!(
            "cargo:warning=rlx-oneapi: native SPIR-V kernels not built \
             (set RLX_ONEAPI_BUILD_KERNELS=1 on an Intel oneAPI host with `ocloc`); \
             compute runs via the CPU reference path"
        );
    }

    // Generate the registry source. Reference blobs RELATIVE to OUT_DIR so the
    // embed survives a relocated target dir (mirrors rlx-vulkan).
    let mut src = String::new();
    src.push_str("// @generated by build.rs — OpenCL-C → SPIR-V Level Zero kernels.\n");
    src.push_str("/// (kernel name, Kernel-flavor SPIR-V byte blob) for every `kernels/*.cl`\n");
    src.push_str("/// compiled by `ocloc` for this build. Empty when no Intel toolchain.\n");
    src.push_str("pub static SPIRV_BLOBS: &[(&str, &[u8])] = &[\n");
    for name in &entries {
        writeln!(
            src,
            "    ({name:?}, include_bytes!(concat!(env!(\"OUT_DIR\"), \"/{name}.spv\"))),"
        )
        .unwrap();
    }
    src.push_str("];\n");
    writeln!(
        src,
        "/// Whether any native SPIR-V kernel was embedded this build."
    )
    .unwrap();
    writeln!(
        src,
        "pub const KERNELS_BUILT: bool = {};",
        !entries.is_empty()
    )
    .unwrap();

    fs::write(Path::new(&out_dir).join("kernels_generated.rs"), src).unwrap();
}

/// Invoke `ocloc compile -spv_only` and copy the emitted SPIR-V to `spv_path`.
fn compile_cl_to_spirv(
    ocloc: &str,
    device: &str,
    cl_path: &Path,
    spv_path: &Path,
) -> Result<(), String> {
    let stem = spv_path.file_stem().unwrap().to_string_lossy().to_string();
    let out_base = spv_path.with_extension("");
    let status = Command::new(ocloc)
        .arg("compile")
        .arg("-file")
        .arg(cl_path)
        .arg("-device")
        .arg(device)
        .arg("-spv_only")
        .arg("-output_no_suffix")
        .arg("-output")
        .arg(&out_base)
        .status()
        .map_err(|e| format!("spawn {ocloc}: {e}"))?;
    if !status.success() {
        return Err(format!("{ocloc} exited with {status}"));
    }
    // ocloc writes `<out_base>.spv`; normalize to the exact path we embed.
    let produced = out_base.with_extension("spv");
    if produced != *spv_path && produced.exists() {
        fs::rename(&produced, spv_path).map_err(|e| format!("rename {stem}.spv: {e}"))?;
    }
    if !spv_path.exists() {
        return Err(format!("ocloc produced no SPIR-V for {stem}"));
    }
    Ok(())
}