splicer 2.3.0

Plan and generate middleware splice operations for WebAssembly component composition graphs.
Documentation
//! Registry-backed catalogue of middleware components shipped with splicer.
//!
//! Each builtin is published as an OCI artifact under
//! `ghcr.io/ejrgilbert/splicer/builtins/<name>:<version>` by the
//! `publish-builtin.yml` workflow. At runtime, [`materialize_into`]
//! resolves a builtin in this order:
//!
//!   1. `$SPLICER_BUILTINS_DIR/<name>.wasm` — local override, intended
//!      for iterating on a builtin without re-publishing. `make
//!      build-builtins` populates `assets/builtins/`, which is the
//!      natural value to point this at.
//!   2. On-disk cache at `<user-cache>/splicer/builtins/<name>@<version>.wasm`.
//!   3. OCI pull from ghcr, populating the cache for next time.
//!
//! Builtins are referenced from the splice config YAML as
//! `inject: [{ builtin: <name> }]`. The parser populates
//! [`crate::parse::config::Injection::builtin`] with the name; the
//! splice pipeline then calls [`materialize_into`] before contract
//! validation runs to write the resolved bytes to disk under the
//! splits dir, after which the rest of the pipeline treats the
//! injection like any other path-backed middleware.

use anyhow::{Context, Result};
use std::path::{Path, PathBuf};

/// Bytes used to stand in for a real builtin in tests. Starts with the
/// wasm magic so any `bytes.starts_with(b"\0asm")` checks downstream
/// stay valid.
#[cfg(test)]
pub(crate) const FAKE_BUILTIN_WASM: &[u8] = b"\0asm\x01\x00\x00\x00";

/// Run `f` with `SPLICER_BUILTINS_DIR` pointing at a tmpdir containing
/// fake `<name>.wasm` files for every name in `names`. Avoids any
/// cache/network dependency in tests that exercise builtin
/// materialization. The `tempfile::TempDir` is held alive in the
/// returned tuple's second slot until the caller drops it; if it were
/// dropped at the end of this function, the override would point at a
/// deleted directory by the time `f` runs.
///
/// All callers — across modules — share the same lock so parallel
/// tests can't stomp on each other's `SPLICER_BUILTINS_DIR` setting.
#[cfg(test)]
pub(crate) fn with_fake_builtins<R>(names: &[&str], f: impl FnOnce() -> R) -> R {
    use std::sync::Mutex;
    static LOCK: Mutex<()> = Mutex::new(());
    let _guard = LOCK.lock().unwrap_or_else(|p| p.into_inner());

    let dir = tempfile::tempdir().unwrap();
    for name in names {
        std::fs::write(dir.path().join(format!("{name}.wasm")), FAKE_BUILTIN_WASM).unwrap();
    }
    let key = LOCAL_OVERRIDE_ENV;
    let prev = std::env::var_os(key);
    // SAFETY: every caller acquires LOCK above before touching the env var.
    unsafe { std::env::set_var(key, dir.path()) };
    let r = f();
    unsafe {
        match prev {
            Some(v) => std::env::set_var(key, v),
            None => std::env::remove_var(key),
        }
    }
    r
}

/// Builtins splicer ships with, paired with the version published to ghcr.
/// Auto-generated by `build.rs` from each `builtins/<name>/Cargo.toml`.
const BUILTIN_VERSIONS: &[(&str, &str)] =
    include!(concat!(env!("OUT_DIR"), "/builtin_manifest.rs"));

const REGISTRY: &str = "ghcr.io";
const REPO_PREFIX: &str = "ejrgilbert/splicer/builtins";

/// Subdirectory under `splits_dir` where materialized builtins are
/// written. Kept separate from sub-component splits so a `make clean`
/// or rerun doesn't tangle the two.
const BUILTIN_SUBDIR: &str = "builtins";

/// Env var that, if set, points at a directory of pre-built `<name>.wasm`
/// files. Used for local iteration; `make build-builtins` writes to
/// `assets/builtins/`, which is the natural value here.
const LOCAL_OVERRIDE_ENV: &str = "SPLICER_BUILTINS_DIR";

/// OCI layer media types we'll accept on pull. We publish with
/// `application/wasm`; the bytecodealliance variant is included so a
/// future republish under a more specific media type still resolves.
const ACCEPTED_LAYER_MEDIA_TYPES: &[&str] = &[
    "application/wasm",
    "application/vnd.bytecodealliance.wasm.component.layer.v0+wasm",
];

fn version_for(name: &str) -> Option<&'static str> {
    BUILTIN_VERSIONS
        .iter()
        .find(|(n, _)| *n == name)
        .map(|(_, v)| *v)
}

/// Names of every builtin shipped with this splicer build, sorted.
/// Used to render a helpful error when YAML references an unknown
/// builtin.
pub fn known_names() -> Vec<&'static str> {
    let mut names: Vec<&'static str> = BUILTIN_VERSIONS.iter().map(|(n, _)| *n).collect();
    names.sort();
    names
}

/// Resolve the named builtin's bytes (override → cache → OCI pull) and
/// write them to `<splits_dir>/builtins/<name>.wasm`. Returns the
/// resulting absolute path.
pub fn materialize_into(splits_dir: &Path, name: &str) -> Result<PathBuf> {
    let version = version_for(name).ok_or_else(|| {
        anyhow::anyhow!(
            "unknown builtin '{name}'. Available: [{}]",
            known_names().join(", ")
        )
    })?;

    let bytes = load_bytes(name, version)
        .with_context(|| format!("Failed to load builtin '{name}@{version}'"))?;

    let dir = splits_dir.join(BUILTIN_SUBDIR);
    std::fs::create_dir_all(&dir)
        .with_context(|| format!("Failed to create builtins dir: {}", dir.display()))?;
    let out = dir.join(format!("{name}.wasm"));
    std::fs::write(&out, &bytes)
        .with_context(|| format!("Failed to write builtin to: {}", out.display()))?;
    Ok(out)
}

fn load_bytes(name: &str, version: &str) -> Result<Vec<u8>> {
    if let Some(dir) = std::env::var_os(LOCAL_OVERRIDE_ENV) {
        let path = Path::new(&dir).join(format!("{name}.wasm"));
        return std::fs::read(&path).with_context(|| {
            format!(
                "{LOCAL_OVERRIDE_ENV}={} is set, but couldn't read {}",
                Path::new(&dir).display(),
                path.display()
            )
        });
    }

    let cache_path = cache_path_for(name, version)?;
    if cache_path.exists() {
        return std::fs::read(&cache_path)
            .with_context(|| format!("Failed to read cache: {}", cache_path.display()));
    }

    let bytes = pull_from_registry(name, version)
        .with_context(|| pull_error_hint(name, version, &cache_path))?;

    if let Some(parent) = cache_path.parent() {
        std::fs::create_dir_all(parent)
            .with_context(|| format!("Failed to create cache dir: {}", parent.display()))?;
    }
    write_cache_atomically(&cache_path, &bytes)?;
    Ok(bytes)
}

/// Write `bytes` to `path` via a sibling tempfile + rename so two
/// concurrent splicer processes that both miss the cache can't observe
/// a torn write. The `.<pid>.tmp` suffix isolates per-process tempfiles
/// in case the rename races; the loser's tempfile is harmless leftover.
fn write_cache_atomically(path: &Path, bytes: &[u8]) -> Result<()> {
    let tmp = path.with_extension(format!("wasm.{}.tmp", std::process::id()));
    std::fs::write(&tmp, bytes)
        .with_context(|| format!("Failed to write cache tempfile: {}", tmp.display()))?;
    std::fs::rename(&tmp, path).with_context(|| {
        format!(
            "Failed to rename cache tempfile {} to {}",
            tmp.display(),
            path.display()
        )
    })?;
    Ok(())
}

fn cache_path_for(name: &str, version: &str) -> Result<PathBuf> {
    let base = user_cache_dir().context(
        "no user cache directory available; \
         set SPLICER_BUILTINS_DIR to a directory of pre-built .wasm files",
    )?;
    Ok(base
        .join("splicer")
        .join(BUILTIN_SUBDIR)
        .join(format!("{name}@{version}.wasm")))
}

/// User cache directory: `$XDG_CACHE_HOME` or `~/.cache` on Unix,
/// `%LOCALAPPDATA%` on Windows.
fn user_cache_dir() -> Option<PathBuf> {
    if cfg!(target_os = "windows") {
        std::env::var_os("LOCALAPPDATA").map(PathBuf::from)
    } else {
        std::env::var_os("XDG_CACHE_HOME")
            .map(PathBuf::from)
            .or_else(|| std::env::var_os("HOME").map(|h| Path::new(&h).join(".cache")))
    }
}

fn oci_reference(name: &str, version: &str) -> String {
    format!("{REGISTRY}/{REPO_PREFIX}/{name}:{version}")
}

fn pull_error_hint(name: &str, version: &str, cache_path: &Path) -> String {
    format!(
        "could not fetch builtin '{name}@{version}' from {} \
         (not present in cache at {}). \
         To use a local build instead, set {LOCAL_OVERRIDE_ENV}=<dir-of-wasm>.",
        oci_reference(name, version),
        cache_path.display(),
    )
}

fn pull_from_registry(name: &str, version: &str) -> Result<Vec<u8>> {
    use oci_client::client::ClientConfig;
    use oci_client::secrets::RegistryAuth;
    use oci_client::{Client, Reference};

    let reference: Reference = oci_reference(name, version)
        .parse()
        .context("internal: built an unparseable OCI reference")?;
    let client = Client::new(ClientConfig::default());
    let auth = RegistryAuth::Anonymous;

    let runtime = tokio::runtime::Builder::new_current_thread()
        .enable_all()
        .build()
        .context("failed to build tokio runtime for OCI pull")?;

    let image_data = runtime
        .block_on(async {
            client
                .pull(&reference, &auth, ACCEPTED_LAYER_MEDIA_TYPES.to_vec())
                .await
        })
        .context("OCI pull failed")?;

    let layer = image_data
        .layers
        .into_iter()
        .next()
        .ok_or_else(|| anyhow::anyhow!("OCI manifest had zero layers"))?;
    Ok(layer.data.to_vec())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn known_names_sorted_and_contains_seed() {
        let names = known_names();
        assert!(names.contains(&"hello-tier1"));
        // Sorted invariant — relied on by the unknown-builtin error.
        let mut sorted = names.clone();
        sorted.sort();
        assert_eq!(names, sorted);
    }

    #[test]
    fn unknown_builtin_lists_available() {
        let tmp = tempfile::tempdir().unwrap();
        let err = materialize_into(tmp.path(), "no-such").unwrap_err();
        let msg = format!("{err:#}");
        assert!(msg.contains("unknown builtin 'no-such'"), "{msg}");
        assert!(msg.contains("hello-tier1"), "{msg}");
    }

    /// With `SPLICER_BUILTINS_DIR` set, materialize_into should read
    /// `<dir>/<name>.wasm` and write it under splits_dir, never
    /// touching cache or network.
    #[test]
    fn local_override_short_circuits_fetch() {
        let splits_dir = tempfile::tempdir().unwrap();
        let path = with_fake_builtins(&["hello-tier1"], || {
            materialize_into(splits_dir.path(), "hello-tier1")
        })
        .expect("materialize");

        assert!(path.ends_with("builtins/hello-tier1.wasm"));
        assert_eq!(std::fs::read(&path).unwrap(), FAKE_BUILTIN_WASM);
    }

    #[test]
    fn local_override_missing_file_errors_clearly() {
        let splits_dir = tempfile::tempdir().unwrap();
        let err = with_fake_builtins(&[], || materialize_into(splits_dir.path(), "hello-tier1"))
            .unwrap_err();
        let msg = format!("{err:#}");
        assert!(msg.contains(LOCAL_OVERRIDE_ENV), "{msg}");
        assert!(msg.contains("hello-tier1.wasm"), "{msg}");
    }

    /// End-to-end fetch from ghcr — gated as ignored because it needs
    /// network. Run with `cargo test -- --ignored` to verify the OCI
    /// pull path manually.
    #[test]
    #[ignore = "hits ghcr.io"]
    fn fetch_from_registry_smoke() {
        let splits = tempfile::tempdir().unwrap();
        let path = materialize_into(splits.path(), "hello-tier1").expect("fetch");
        let bytes = std::fs::read(&path).unwrap();
        assert!(
            bytes.starts_with(b"\0asm"),
            "fetched bytes are wasm magic, got {:?}",
            &bytes[..bytes.len().min(8)]
        );
    }
}