splicer 2.4.1

Plan and generate middleware splice operations for WebAssembly component composition graphs.
Documentation
//! Tier-1/2 builtins: pre-built wasm components published to an OCI
//! registry. At splice-time [`materialize_into`] resolves a builtin
//! in this order:
//!
//!   1. `$SPLICER_BUILTINS_DIR/<name>.wasm` — local override, intended
//!      for iterating on a builtin without re-publishing. `make
//!      build-builtins` populates `assets/builtins/`, which is the
//!      natural value to point this at.
//!   2. On-disk cache at `<user-cache>/splicer/builtins/<name>@<version>.wasm`.
//!   3. OCI pull from ghcr, populating the cache for next time.

use anyhow::{Context, Result};
use std::path::{Path, PathBuf};

/// Bytes used to stand in for a real builtin in tests. Starts with the
/// wasm magic so any `bytes.starts_with(b"\0asm")` checks downstream
/// stay valid.
#[cfg(test)]
pub(crate) const FAKE_BUILTIN_WASM: &[u8] = b"\0asm\x01\x00\x00\x00";

/// Run `f` with `SPLICER_BUILTINS_DIR` pointing at a tmpdir containing
/// fake `<name>.wasm` files for every name in `names`. Avoids any
/// cache/network dependency in tests that exercise builtin
/// materialization. The `tempfile::TempDir` is held alive in the
/// returned tuple's second slot until the caller drops it; if it were
/// dropped at the end of this function, the override would point at a
/// deleted directory by the time `f` runs.
///
/// All callers — across modules — share the same lock so parallel
/// tests can't stomp on each other's `SPLICER_BUILTINS_DIR` setting.
#[cfg(test)]
pub(crate) fn with_fake_builtins<R>(names: &[&str], f: impl FnOnce() -> R) -> R {
    use std::sync::Mutex;
    static LOCK: Mutex<()> = Mutex::new(());
    let _guard = LOCK.lock().unwrap_or_else(|p| p.into_inner());

    let dir = tempfile::tempdir().unwrap();
    for name in names {
        std::fs::write(dir.path().join(format!("{name}.wasm")), FAKE_BUILTIN_WASM).unwrap();
    }
    let key = LOCAL_OVERRIDE_ENV;
    let prev = std::env::var_os(key);
    // SAFETY: every caller acquires LOCK above before touching the env var.
    unsafe { std::env::set_var(key, dir.path()) };
    let r = f();
    unsafe {
        match prev {
            Some(v) => std::env::set_var(key, v),
            None => std::env::remove_var(key),
        }
    }
    r
}

/// Builtins splicer ships with, paired with the version published to ghcr.
/// Auto-generated by `build.rs` from each `builtins/<name>/Cargo.toml`.
const BUILTIN_VERSIONS: &[(&str, &str)] =
    include!(concat!(env!("OUT_DIR"), "/builtin_protocol.rs"));

const REGISTRY: &str = "ghcr.io";
const REPO_PREFIX: &str = "ejrgilbert/splicer/builtins";

/// Subdirectory under `splits_dir` where materialized builtins are
/// written. Kept separate from sub-component splits so a `make clean`
/// or rerun doesn't tangle the two.
const BUILTIN_SUBDIR: &str = "builtins";

/// Env var that, if set, points at a directory of pre-built `<name>.wasm`
/// files. Used for local iteration; `make build-builtins` writes to
/// `assets/builtins/`, which is the natural value here.
const LOCAL_OVERRIDE_ENV: &str = "SPLICER_BUILTINS_DIR";

/// OCI layer media types we'll accept on pull. We publish with
/// `application/wasm`; the bytecodealliance variant is included so a
/// future republish under a more specific media type still resolves.
const ACCEPTED_LAYER_MEDIA_TYPES: &[&str] = &[
    "application/wasm",
    "application/vnd.bytecodealliance.wasm.component.layer.v0+wasm",
];

fn version_for(name: &str) -> Option<&'static str> {
    BUILTIN_VERSIONS
        .iter()
        .find(|(n, _)| *n == name)
        .map(|(_, v)| *v)
}

/// Internal-only builtin names — built and shipped like other builtins
/// (so the publish + materialize plumbing applies) but not exposed for
/// users to reference in YAML. The `splicer:builtin-config` provider
/// template is the only such builtin today: splicer materializes a
/// patched copy on demand whenever a user-facing builtin imports the
/// config substrate. Each entry's canonical name lives in the module
/// that owns it.
const INTERNAL_BUILTINS: &[&str] = &[crate::config_provider::PROVIDER_BUILTIN_NAME];

fn is_internal(name: &str) -> bool {
    INTERNAL_BUILTINS.contains(&name)
}

/// Names of every user-facing builtin shipped with this splicer build,
/// sorted. Used to render a helpful error when YAML references an
/// unknown builtin. Internal-only builtins (see [`INTERNAL_BUILTINS`])
/// are filtered out.
pub fn known_names() -> Vec<&'static str> {
    let mut names: Vec<&'static str> = BUILTIN_VERSIONS
        .iter()
        .map(|(n, _)| *n)
        .filter(|n| !is_internal(n))
        .collect();
    names.sort();
    names
}

/// Resolve a tier-1/2 builtin's bytes and extract its embedded
/// manifest if present. `Ok(None)` for builtins that predate the
/// manifest substrate; `Err` for resolution failures.
pub fn manifest_for(name: &str) -> Result<Option<builtin_protocol::Manifest>> {
    let bytes = load_resolved_bytes(name)?;
    builtin_protocol::extract_for_builtin(&bytes, name)
        .map_err(|e| anyhow::anyhow!("manifest extraction failed: {e}"))
}

/// Resolve a single tier-1/2 builtin's manifest. Errors when the
/// builtin name is unknown, the bytes can't be fetched, or no
/// matching manifest section is present.
pub fn resolve_manifest(name: &str) -> Result<builtin_protocol::Manifest> {
    let bytes = load_resolved_bytes(name)?;
    builtin_protocol::extract_for_builtin(&bytes, name)
        .map_err(|e| anyhow::anyhow!("manifest extraction failed: {e}"))?
        .ok_or_else(|| {
            anyhow::anyhow!(
                "builtin '{name}' carries no embedded `splicer-builtin-manifest/{name}` \
                 section. The builtin pre-dates manifests, or was built without \
                 splicer-builtin-protocol's `build_helper::codegen` in its build.rs."
            )
        })
}

/// Resolve the named builtin's bytes (override → cache → OCI pull).
/// Used both by [`materialize_into`] (which writes them to disk) and
/// by the config-provider patcher (which patches them in memory before
/// writing). Treats internal-only builtins the same as user-facing
/// ones — they ship through the same channels.
pub(crate) fn load_resolved_bytes(name: &str) -> Result<Vec<u8>> {
    let version = version_for(name).ok_or_else(|| {
        anyhow::anyhow!(
            "unknown builtin '{name}'. Available: [{}]",
            known_names().join(", ")
        )
    })?;
    load_bytes(name, version).with_context(|| format!("Failed to load builtin '{name}@{version}'"))
}

/// Resolve the named builtin's bytes (override → cache → OCI pull) and
/// write them to `<splits_dir>/builtins/<name>.wasm`. Returns the
/// resulting absolute path. Rejects internal-only builtin names so a
/// YAML rule can't smuggle one in via `builtin: <internal-name>`.
pub fn materialize_into(splits_dir: &Path, name: &str) -> Result<PathBuf> {
    if is_internal(name) {
        anyhow::bail!(
            "'{name}' is an internal builtin and cannot be referenced from YAML. \
             Available: [{}]",
            known_names().join(", ")
        );
    }
    let bytes = load_resolved_bytes(name)?;

    let dir = splits_dir.join(BUILTIN_SUBDIR);
    std::fs::create_dir_all(&dir)
        .with_context(|| format!("Failed to create builtins dir: {}", dir.display()))?;
    let out = dir.join(format!("{name}.wasm"));
    std::fs::write(&out, &bytes)
        .with_context(|| format!("Failed to write builtin to: {}", out.display()))?;
    Ok(out)
}

fn load_bytes(name: &str, version: &str) -> Result<Vec<u8>> {
    if let Some(dir) = std::env::var_os(LOCAL_OVERRIDE_ENV) {
        let path = Path::new(&dir).join(format!("{name}.wasm"));
        return std::fs::read(&path).with_context(|| {
            format!(
                "{LOCAL_OVERRIDE_ENV}={} is set, but couldn't read {}",
                Path::new(&dir).display(),
                path.display()
            )
        });
    }

    let cache_path = cache_path_for(name, version)?;
    if cache_path.exists() {
        return std::fs::read(&cache_path)
            .with_context(|| format!("Failed to read cache: {}", cache_path.display()));
    }

    let bytes = pull_from_registry(name, version)
        .with_context(|| pull_error_hint(name, version, &cache_path))?;

    if let Some(parent) = cache_path.parent() {
        std::fs::create_dir_all(parent)
            .with_context(|| format!("Failed to create cache dir: {}", parent.display()))?;
    }
    write_cache_atomically(&cache_path, &bytes)?;
    Ok(bytes)
}

/// Write `bytes` to `path` via a sibling tempfile + rename so two
/// concurrent splicer processes that both miss the cache can't observe
/// a torn write. The `.<pid>.tmp` suffix isolates per-process tempfiles
/// in case the rename races; the loser's tempfile is harmless leftover.
fn write_cache_atomically(path: &Path, bytes: &[u8]) -> Result<()> {
    let tmp = path.with_extension(format!("wasm.{}.tmp", std::process::id()));
    std::fs::write(&tmp, bytes)
        .with_context(|| format!("Failed to write cache tempfile: {}", tmp.display()))?;
    std::fs::rename(&tmp, path).with_context(|| {
        format!(
            "Failed to rename cache tempfile {} to {}",
            tmp.display(),
            path.display()
        )
    })?;
    Ok(())
}

fn cache_path_for(name: &str, version: &str) -> Result<PathBuf> {
    let base = super::user_cache_dir().context(
        "no user cache directory available; \
         set SPLICER_BUILTINS_DIR to a directory of pre-built .wasm files",
    )?;
    Ok(base
        .join("splicer")
        .join(BUILTIN_SUBDIR)
        .join(format!("{name}@{version}.wasm")))
}

fn oci_reference(name: &str, version: &str) -> String {
    format!("{REGISTRY}/{REPO_PREFIX}/{name}:{version}")
}

fn pull_error_hint(name: &str, version: &str, cache_path: &Path) -> String {
    format!(
        "could not fetch builtin '{name}@{version}' from {} \
         (not present in cache at {}). \
         To use a local build instead, set {LOCAL_OVERRIDE_ENV}=<dir-of-wasm>.",
        oci_reference(name, version),
        cache_path.display(),
    )
}

fn pull_from_registry(name: &str, version: &str) -> Result<Vec<u8>> {
    use oci_client::client::ClientConfig;
    use oci_client::secrets::RegistryAuth;
    use oci_client::{Client, Reference};

    let reference: Reference = oci_reference(name, version)
        .parse()
        .context("internal: built an unparseable OCI reference")?;
    let client = Client::new(ClientConfig::default());
    let auth = RegistryAuth::Anonymous;

    let runtime = tokio::runtime::Builder::new_current_thread()
        .enable_all()
        .build()
        .context("failed to build tokio runtime for OCI pull")?;

    let image_data = runtime
        .block_on(async {
            client
                .pull(&reference, &auth, ACCEPTED_LAYER_MEDIA_TYPES.to_vec())
                .await
        })
        .context("OCI pull failed")?;

    let layer = image_data
        .layers
        .into_iter()
        .next()
        .ok_or_else(|| anyhow::anyhow!("OCI manifest had zero layers"))?;
    Ok(layer.data.to_vec())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn known_names_sorted_and_contains_seed() {
        let names = known_names();
        assert!(names.contains(&"hello-tier1"));
        // Sorted invariant — relied on by the unknown-builtin error.
        let mut sorted = names.clone();
        sorted.sort();
        assert_eq!(names, sorted);
    }

    #[test]
    fn unknown_builtin_lists_available() {
        let tmp = tempfile::tempdir().unwrap();
        let err = materialize_into(tmp.path(), "no-such").unwrap_err();
        let msg = format!("{err:#}");
        assert!(msg.contains("unknown builtin 'no-such'"), "{msg}");
        assert!(msg.contains("hello-tier1"), "{msg}");
    }

    /// With `SPLICER_BUILTINS_DIR` set, materialize_into should read
    /// `<dir>/<name>.wasm` and write it under splits_dir, never
    /// touching cache or network.
    #[test]
    fn local_override_short_circuits_fetch() {
        let splits_dir = tempfile::tempdir().unwrap();
        let path = with_fake_builtins(&["hello-tier1"], || {
            materialize_into(splits_dir.path(), "hello-tier1")
        })
        .expect("materialize");

        assert!(path.ends_with("builtins/hello-tier1.wasm"));
        assert_eq!(std::fs::read(&path).unwrap(), FAKE_BUILTIN_WASM);
    }

    #[test]
    fn local_override_missing_file_errors_clearly() {
        let splits_dir = tempfile::tempdir().unwrap();
        let err = with_fake_builtins(&[], || materialize_into(splits_dir.path(), "hello-tier1"))
            .unwrap_err();
        let msg = format!("{err:#}");
        assert!(msg.contains(LOCAL_OVERRIDE_ENV), "{msg}");
        assert!(msg.contains("hello-tier1.wasm"), "{msg}");
    }

    /// End-to-end fetch from ghcr — gated as ignored because it needs
    /// network. Run with `cargo test -- --ignored` to verify the OCI
    /// pull path manually.
    #[test]
    #[ignore = "hits ghcr.io"]
    fn fetch_from_registry_smoke() {
        let splits = tempfile::tempdir().unwrap();
        let path = materialize_into(splits.path(), "hello-tier1").expect("fetch");
        let bytes = std::fs::read(&path).unwrap();
        assert!(
            bytes.starts_with(b"\0asm"),
            "fetched bytes are wasm magic, got {:?}",
            &bytes[..bytes.len().min(8)]
        );
    }
}