rialo-build-lib 0.3.0

Shared library for Rialo program building logic
Documentation
// Copyright (c) Subzero Labs, Inc.
// SPDX-License-Identifier: Apache-2.0

//! Cargo build-script integration for Rialo PolkaVM artifact crates.
//!
//! This module exists for crates that need a local Rialo program artifact as
//! part of an otherwise normal Cargo build. On stable Cargo, the practical way
//! to do that is to use `build.rs` to compile a sibling program for the Rialo
//! custom RISC-V target, post-process its ELF into a `.polkavm` blob, and then
//! make that blob available to the current crate.
//!
//! # Problem
//!
//! Simply put, we want to express in the build system that the current host
//! build also depends on a sibling package's target-specific artifact.
//!
//! We want Cargo to:
//!
//! - build the current crate for the host as normal
//! - also build a sibling Cargo package for the Rialo custom RISC-V target
//! - then make the produced artifact available to the current build
//!
//! Stable Cargo *can* build multiple requested targets in one invocation, but
//! that is not enough here. The missing piece is a first-class dependency edge
//! for "this host crate also needs that sibling package's target-specific
//! artifact". If Cargo had that on stable, surfacing the result through a
//! host-built Rust library would be optional rather than part of the
//! workaround.
//!
//! On stable Cargo, that graph is still awkward to express natively:
//!
//! - artifact dependencies / binary dependencies are nightly-only behind
//!   `-Z bindeps`; see Cargo's unstable
//!   [`artifact-dependencies`](https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#artifact-dependencies)
//!   docs and [RFC 3028](https://rust-lang.github.io/rfcs/3028-cargo-binary-dependencies.html)
//! - forcing a different target for one package inside the graph is also
//!   unstable via
//!   [`per-package-target`](https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#per-package-target),
//!   and its tracking issue explicitly calls out rough edges around dependency
//!   interaction; see [cargo#9406](https://github.com/rust-lang/cargo/issues/9406)
//! - build scripts still do not receive a resolved target directory directly
//!   from Cargo; see Cargo's documented build-script environment variables and
//!   [cargo#9661](https://github.com/rust-lang/cargo/issues/9661)
//!
//! Until Cargo has that on stable, a `build.rs` bridge is the pragmatic
//! option. That is also an explicitly recognized workaround in
//! [RFC 3028](https://rust-lang.github.io/rfcs/3028-cargo-binary-dependencies.html),
//! which proposes artifact dependencies as the longer-term replacement for this
//! style of nested-Cargo integration. In that model, `build.rs` would likely
//! shrink to the Rialo-specific ELF -> `.polkavm` postprocessing step.
//!
//! # Scope
//!
//! This module adapts the shared compilation logic in [`crate::compilation`] to
//! Cargo's `build.rs` environment. It owns the Cargo-facing mechanics:
//! locating the sibling program relative to the current crate, emitting Cargo
//! build-script directives, deriving rerun inputs from the local Cargo package
//! graph via `cargo metadata`, and exposing the produced artifact through
//! `OUT_DIR` and `RIALO_BUILD_ARTIFACT_FILE`.
//!
//! The actual Rialo program compilation policy stays in
//! [`crate::compilation`]: toolchain resolution, target-dir resolution, builder
//! selection, nested Cargo environment sanitization, and the mechanics of
//! producing the program artifact.
//!
//! This module is for artifact crates that need to build one local Cargo
//! program into one PolkaVM blob using the currently supported Rialo custom
//! RISC-V path.
//!
//! # Change Detection
//!
//! Stable Cargo does not let us declare the sibling program crate as a native
//! artifact dependency, so build-script reruns must be driven by explicit
//! `cargo:rerun-if-*` directives.
//!
//! Today this module uses a broad but correct approximation:
//!
//! - watch `CARGO_TARGET_DIR`
//! - watch `RIALO_RUST_TOOLCHAIN_VERSION`
//! - watch the local Cargo package graph rooted at the source program
//! - watch the resolved `.rialo-toolchain` file when that file selected the
//!   toolchain version
//!
//! The package-graph watch set is intentionally broader than an exact compiler
//! dep-info file. That tradeoff keeps the implementation simple and reliable on
//! stable Cargo. If we later want narrower reruns, the likely refinement is to
//! consume the nested build's generated dep-info instead of broadening this API.

use std::{
    collections::{BTreeSet, HashMap, HashSet},
    env, fs,
    path::{Path, PathBuf},
};

use anyhow::{Context, Result};
use cargo_metadata::{Metadata, Package, PackageId};

use crate::compilation;

const ARTIFACT_ENV_VAR: &str = "RIALO_BUILD_ARTIFACT_FILE";

/// Create a build-script helper for compiling a PolkaVM artifact from a local
/// Cargo program crate.
pub fn setup_polkavm_artifact_build() -> PolkaVmArtifactBuild {
    PolkaVmArtifactBuild::default()
}

/// Thin build-script adapter over the shared compilation path.
#[derive(Debug, Default)]
pub struct PolkaVmArtifactBuild {
    program_path: Option<PathBuf>,
    toolchain_version_override: Option<String>,
}

/// Result of running a build-script artifact build.
#[derive(Debug)]
pub struct BuildScriptResult {
    /// Path to the compiled artifact in `OUT_DIR`.
    pub artifact_path: PathBuf,
}

impl PolkaVmArtifactBuild {
    /// Set the source program path relative to the artifact crate.
    pub fn program_path(mut self, program_path: impl Into<PathBuf>) -> Self {
        self.program_path = Some(program_path.into());
        self
    }

    /// Set an explicit Rialo Rust toolchain version override.
    pub fn toolchain_version(mut self, version: impl Into<String>) -> Self {
        self.toolchain_version_override = Some(version.into());
        self
    }

    /// Compile the configured program and emit Cargo build-script directives.
    pub fn run(self) -> Result<BuildScriptResult> {
        emit_rerun_if_env_changed("CARGO_TARGET_DIR");
        emit_rerun_if_env_changed("RIALO_RUST_TOOLCHAIN_VERSION");

        let manifest_dir = env_path("CARGO_MANIFEST_DIR")?;
        let out_dir = env_path("OUT_DIR")?;
        let relative_program_path = self
            .program_path
            .context("PolkaVM artifact build requires a program path")?;
        let program_path = manifest_dir.join(relative_program_path);

        emit_rerun_if_changed_for_local_package_graph(&program_path)?;

        // Resolve the nested build's target directory. When running inside a
        // build script, the outer cargo holds `target/{profile}/.cargo-lock`
        // for the entire build. If the nested cargo targets the same profile
        // directory, it deadlocks waiting for that lock.
        //
        // To avoid this, we place the nested build in a `rialo-build/`
        // subdirectory under the resolved target root. This gives the nested
        // cargo its own lock file while keeping artifacts under `target/` so
        // CI caching picks them up. Registry crates in `~/.cargo/registry`
        // are still shared.
        let base_target_dir = crate::resolve_target_dir_for_program(&program_path, None)?;
        let target_dir_override = base_target_dir.join("rialo-build");

        let result = compilation::compile_program(&compilation::CompileProgramRequest {
            program_path,
            output_dir: out_dir.clone(),
            target_dir_override: Some(target_dir_override),
            toolchain_version_override: self.toolchain_version_override,
        })?;
        if let Some(source_path) = result.resolved_toolchain.source_path.as_deref() {
            emit_rerun_if_changed(source_path);
        }

        let artifact_file_name = result
            .program_binary
            .file_name()
            .context("Compiled program artifact has no file name")?;
        let artifact_path = out_dir.join(artifact_file_name);

        fs::copy(&result.program_binary, &artifact_path).with_context(|| {
            format!(
                "Failed to copy {} to {}",
                result.program_binary.display(),
                artifact_path.display()
            )
        })?;

        println!(
            "cargo:rustc-env={ARTIFACT_ENV_VAR}={}",
            artifact_file_name.to_string_lossy()
        );

        Ok(BuildScriptResult { artifact_path })
    }
}

fn env_path(var_name: &str) -> Result<PathBuf> {
    let value = env::var_os(var_name)
        .with_context(|| format!("{var_name} is not available in this build script"))?;
    Ok(PathBuf::from(value))
}

fn emit_rerun_if_env_changed(var_name: &str) {
    println!("cargo:rerun-if-env-changed={var_name}");
}

fn emit_rerun_if_changed(path: &Path) {
    println!("cargo:rerun-if-changed={}", path.display());
}

/// Emit `rerun-if-changed` directives for the local Cargo package graph rooted
/// at `program_path`.
fn emit_rerun_if_changed_for_local_package_graph(program_path: &Path) -> Result<()> {
    for path in local_package_graph_watch_paths(program_path)? {
        emit_rerun_if_changed(&path);
    }
    Ok(())
}

fn local_package_graph_watch_paths(program_path: &Path) -> Result<Vec<PathBuf>> {
    let metadata = cargo_metadata::MetadataCommand::new()
        .manifest_path(program_path.join("Cargo.toml"))
        .exec()
        .with_context(|| {
            format!(
                "Failed to load Cargo metadata for {}",
                program_path.display()
            )
        })?;

    let root_package = root_package(&metadata, program_path)?;
    let local_package_ids = local_package_closure(&metadata, &root_package.id)?;
    let mut watched_paths = BTreeSet::new();

    watched_paths.insert(metadata.workspace_root.as_std_path().join("Cargo.toml"));

    let lockfile = metadata.workspace_root.as_std_path().join("Cargo.lock");
    if lockfile.exists() {
        watched_paths.insert(lockfile);
    }

    for package in metadata
        .packages
        .iter()
        .filter(|package| package.source.is_none() && local_package_ids.contains(&package.id))
    {
        watched_paths.insert(package.manifest_path.as_std_path().to_path_buf());

        let package_dir = package
            .manifest_path
            .as_std_path()
            .parent()
            .context("Package manifest path has no parent directory")?;

        let src_dir = package_dir.join("src");
        if src_dir.exists() {
            watched_paths.insert(src_dir);
        }

        let build_script = package_dir.join("build.rs");
        if build_script.exists() {
            watched_paths.insert(build_script);
        }
    }

    Ok(watched_paths.into_iter().collect())
}

fn root_package<'a>(metadata: &'a Metadata, program_path: &Path) -> Result<&'a Package> {
    let manifest_path = program_path
        .join("Cargo.toml")
        .canonicalize()
        .with_context(|| format!("Failed to canonicalize {}", program_path.display()))?;

    metadata
        .packages
        .iter()
        .find(|package| {
            package
                .manifest_path
                .as_std_path()
                .canonicalize()
                .ok()
                .as_ref()
                == Some(&manifest_path)
        })
        .with_context(|| format!("Failed to locate package for {}", program_path.display()))
}

fn local_package_closure(metadata: &Metadata, root_id: &PackageId) -> Result<HashSet<PackageId>> {
    let resolve = metadata
        .resolve
        .as_ref()
        .context("Cargo metadata did not include a resolved package graph")?;

    let edges: HashMap<&PackageId, Vec<&PackageId>> = resolve
        .nodes
        .iter()
        .map(|node| (&node.id, node.deps.iter().map(|dep| &dep.pkg).collect()))
        .collect();

    let package_lookup: HashMap<&PackageId, &Package> = metadata
        .packages
        .iter()
        .map(|package| (&package.id, package))
        .collect();

    let mut visited = HashSet::new();
    let mut stack = vec![root_id.clone()];

    while let Some(package_id) = stack.pop() {
        if !visited.insert(package_id.clone()) {
            continue;
        }

        for dependency_id in edges.get(&package_id).into_iter().flatten() {
            let Some(package) = package_lookup.get(dependency_id) else {
                continue;
            };

            if package.source.is_none() {
                stack.push((*dependency_id).clone());
            }
        }
    }

    Ok(visited)
}

#[cfg(test)]
mod tests {
    use std::fs;

    use super::local_package_graph_watch_paths;

    #[test]
    fn local_package_graph_watch_paths_include_local_packages_and_workspace_files() {
        let tempdir = tempfile::tempdir().expect("create tempdir");
        let workspace_root = tempdir.path();

        fs::write(
            workspace_root.join("Cargo.toml"),
            r#"[workspace]
members = ["app", "dep"]
resolver = "2"
"#,
        )
        .expect("write workspace Cargo.toml");

        fs::create_dir_all(workspace_root.join("app/src")).expect("create app/src");
        fs::write(
            workspace_root.join("app/Cargo.toml"),
            r#"[package]
name = "app"
version = "0.1.0"
edition = "2021"

[dependencies]
dep = { path = "../dep" }
"#,
        )
        .expect("write app Cargo.toml");
        fs::write(workspace_root.join("app/src/lib.rs"), "pub fn app() {}\n")
            .expect("write app lib.rs");

        fs::create_dir_all(workspace_root.join("dep/src")).expect("create dep/src");
        fs::write(
            workspace_root.join("dep/Cargo.toml"),
            r#"[package]
name = "dep"
version = "0.1.0"
edition = "2021"
"#,
        )
        .expect("write dep Cargo.toml");
        fs::write(workspace_root.join("dep/src/lib.rs"), "pub fn dep() {}\n")
            .expect("write dep lib.rs");
        fs::write(workspace_root.join("dep/build.rs"), "fn main() {}\n")
            .expect("write dep build.rs");
        fs::write(workspace_root.join("Cargo.lock"), "").expect("write Cargo.lock");

        let watched_paths = local_package_graph_watch_paths(&workspace_root.join("app"))
            .expect("collect watched paths");

        assert!(watched_paths.contains(&workspace_root.join("Cargo.toml")));
        assert!(watched_paths.contains(&workspace_root.join("Cargo.lock")));
        assert!(watched_paths.contains(&workspace_root.join("app/Cargo.toml")));
        assert!(watched_paths.contains(&workspace_root.join("app/src")));
        assert!(watched_paths.contains(&workspace_root.join("dep/Cargo.toml")));
        assert!(watched_paths.contains(&workspace_root.join("dep/src")));
        assert!(watched_paths.contains(&workspace_root.join("dep/build.rs")));
    }
}