vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
// Build-time operation registry and module-tree generation for vyre core.

// ARCHITECTURE: keep this build script aligned with ../ARCHITECTURE_ZERO_CONFLICT.md.
use std::collections::{HashMap, HashSet};
use std::env;
use std::fs::{self, Metadata};
use std::path::{Path, PathBuf};

#[cfg(unix)]
use std::os::unix::fs::MetadataExt;

use toml::Value;
use vyre_build_scan::{scan_rust_specs, RustSpecRegistry};

mod module_tree;

const MAX_WALK_DEPTH: usize = 16;
const MAX_SPEC_BYTES: u64 = 1_048_576;
const RESERVED_ID_ENV: &str = "VYRE_MAINTAINER_ONLY_ALLOW_RESERVED_OP_IDS";

#[cfg(unix)]
type FileKey = (u64, u64);

#[cfg(not(unix))]
type FileKey = PathBuf;

fn main() {
    if let Err(error) = run() {
        panic!("{error}");
    }
}

fn run() -> Result<(), String> {
    let manifest_dir = env::var("CARGO_MANIFEST_DIR").map_err(|error| {
        format!("Fix: set CARGO_MANIFEST_DIR for build script execution: {error}")
    })?;
    let manifest_path = PathBuf::from(manifest_dir);
    let ops_root = manifest_path.join("src/ops");
    println!("cargo:rerun-if-changed={}", ops_root.display());

    let specs = discover_specs(&ops_root)?;
    let out_dir = env::var("OUT_DIR")
        .map_err(|error| format!("Fix: set OUT_DIR for generated walked_ops.rs: {error}"))?;
    let out_path = PathBuf::from(out_dir);
    write_generated(&out_path, "walked_ops.rs", render_walked_ops(&specs))?;
    scan_rust_specs(&RustSpecRegistry {
        scan_dirs: &["src/ops", "src/ir/transform/compiler"],
        const_name: "GENERATED_REGISTRY",
        output_file: "ops_registry.rs",
    });

    let leaves = module_tree::discover_module_leaves(&manifest_path.join("src"))?;
    write_generated(
        &out_path,
        "vyre_module_tree.rs",
        module_tree::render_module_tree(&leaves),
    )?;

    Ok(())
}

fn write_generated(out_path: &Path, name: &str, contents: String) -> Result<(), String> {
    let path = out_path.join(name);
    fs::write(&path, contents).map_err(|error| {
        format!(
            "Fix: ensure build output directory is writable at {}: {error}",
            path.display()
        )
    })
}

/// Discovers op specs under `ops_root`; errors include actionable `Fix: ...` text.
pub fn discover_specs(ops_root: &Path) -> Result<Vec<(String, String)>, String> {
    let root_metadata = checked_metadata(ops_root)?;
    if !root_metadata.is_dir() {
        return Err(format!(
            "Fix: make ops root a real directory at {}",
            ops_root.display()
        ));
    }

    let mut state = WalkState::new(file_key(ops_root, &root_metadata)?);
    walk_ops_tree(ops_root, ops_root, 0, &mut state)?;
    state.spec_paths.sort();

    let mut seen_ids = HashMap::new();
    let mut specs = Vec::new();
    for spec_path in state.spec_paths {
        let Some(spec_dir) = spec_path.parent() else {
            return Err(format!(
                "Fix: place spec.toml inside an op directory: {}",
                spec_path.display()
            ));
        };
        let kernel_path = if spec_dir.join("kernel.rs").is_file() {
            spec_dir.join("kernel.rs")
        } else if spec_dir.file_name().is_some_and(|name| name == "metadata") {
            spec_dir
                .parent()
                .map(|op_dir| op_dir.join("implementation/kernel.rs"))
                .unwrap_or_else(|| spec_dir.join("kernel.rs"))
        } else {
            spec_dir.join("kernel.rs")
        };
        // In single-file layout, kernel code lives in the parent .rs file.
        if !kernel_path.is_file() {
            continue;
        }

        let (id, archetype) = load_spec_identity(&spec_path)?;
        if let Some(previous_path) = seen_ids.insert(id.clone(), spec_path.clone()) {
            return Err(format!(
                "Fix: remove duplicate op id `{id}` from {}; it is already declared by {}",
                spec_path.display(),
                previous_path.display()
            ));
        }
        validate_op_id(&id, &spec_path)?;
        validate_reserved_prefix(&id, &spec_path)?;
        specs.push((id, archetype));
    }

    Ok(specs)
}

struct WalkState {
    visited_dirs: HashSet<FileKey>,
    casefold_paths: HashMap<String, PathBuf>,
    spec_paths: Vec<PathBuf>,
}

impl WalkState {
    fn new(root_key: FileKey) -> Self {
        Self {
            visited_dirs: HashSet::from([root_key]),
            casefold_paths: HashMap::new(),
            spec_paths: Vec::new(),
        }
    }
}

fn walk_ops_tree(
    ops_root: &Path,
    dir: &Path,
    depth: usize,
    state: &mut WalkState,
) -> Result<(), String> {
    if depth > MAX_WALK_DEPTH {
        return Err(format!(
            "Fix: reduce op tree depth at {} to {MAX_WALK_DEPTH} or fewer path components",
            dir.display()
        ));
    }

    let entries = fs::read_dir(dir).map_err(|error| {
        format!(
            "Fix: make every path under {} readable by the build script: {error}",
            ops_root.display()
        )
    })?;
    for entry in entries {
        let entry = entry.map_err(|error| {
            format!(
                "Fix: make every directory entry under {} readable by the build script: {error}",
                dir.display()
            )
        })?;
        let path = entry.path();
        let metadata = checked_metadata(&path)?;
        reject_spec_toml_non_file(&path, &metadata)?;

        if metadata.is_dir() {
            validate_casefold_path(ops_root, &path, state)?;
            let key = file_key(&path, &metadata)?;
            if !state.visited_dirs.insert(key) {
                return Err(format!(
                    "Fix: remove directory cycle or hard-link alias at {}",
                    path.display()
                ));
            }
            walk_ops_tree(ops_root, &path, depth + 1, state)?;
        } else if metadata.is_file() && entry.file_name() == "spec.toml" {
            if metadata.len() > MAX_SPEC_BYTES {
                return Err(format!(
                    "Fix: reduce spec.toml at {} to at most {MAX_SPEC_BYTES} bytes",
                    path.display()
                ));
            }
            state.spec_paths.push(path);
        }
    }
    Ok(())
}

fn checked_metadata(path: &Path) -> Result<Metadata, String> {
    let metadata = fs::symlink_metadata(path)
        .map_err(|error| format!("Fix: make path readable at {}: {error}", path.display()))?;
    if metadata.file_type().is_symlink() {
        return Err(format!(
            "Fix: replace symlink at {} with a real file or directory inside the walked tree",
            path.display()
        ));
    }
    Ok(metadata)
}

fn reject_spec_toml_non_file(path: &Path, metadata: &Metadata) -> Result<(), String> {
    if path.file_name().is_some_and(|name| name == "spec.toml") && !metadata.is_file() {
        return Err(format!(
            "Fix: replace spec.toml at {} with a readable regular file",
            path.display()
        ));
    }
    Ok(())
}

fn validate_casefold_path(
    ops_root: &Path,
    path: &Path,
    state: &mut WalkState,
) -> Result<(), String> {
    let relative = path.strip_prefix(ops_root).map_err(|error| {
        format!(
            "Fix: keep op path {} inside ops root {}: {error}",
            path.display(),
            ops_root.display()
        )
    })?;
    let key = relative.to_string_lossy().to_ascii_lowercase();
    if let Some(previous) = state.casefold_paths.insert(key, path.to_path_buf()) {
        if previous != path {
            return Err(format!(
                "Fix: remove case-only path alias {}; it collides with {} on case-insensitive filesystems",
                path.display(),
                previous.display()
            ));
        }
    }
    Ok(())
}

#[cfg(unix)]
fn file_key(_path: &Path, metadata: &Metadata) -> Result<FileKey, String> {
    Ok((metadata.dev(), metadata.ino()))
}

#[cfg(not(unix))]
fn file_key(path: &Path, _metadata: &Metadata) -> Result<FileKey, String> {
    path.canonicalize()
        .map_err(|error| format!("Fix: canonicalize directory at {}: {error}", path.display()))
}

fn load_spec_identity(spec_path: &Path) -> Result<(String, String), String> {
    let metadata = checked_metadata(spec_path)?;
    if !metadata.is_file() {
        return Err(format!(
            "Fix: replace spec.toml at {} with a readable regular file",
            spec_path.display()
        ));
    }
    if metadata.len() > MAX_SPEC_BYTES {
        return Err(format!(
            "Fix: reduce spec.toml at {} to at most {MAX_SPEC_BYTES} bytes",
            spec_path.display()
        ));
    }

    let source = fs::read_to_string(spec_path).map_err(|error| {
        format!(
            "Fix: make spec.toml readable at {}: {error}",
            spec_path.display()
        )
    })?;
    if source.len() as u64 > MAX_SPEC_BYTES {
        return Err(format!(
            "Fix: reduce spec.toml at {} to at most {MAX_SPEC_BYTES} bytes",
            spec_path.display()
        ));
    }

    let value = source.parse::<Value>().map_err(|error| {
        format!(
            "Fix: repair malformed TOML in {}: {error}",
            spec_path.display()
        )
    })?;
    let id = required_string(&value, spec_path, "id")?;
    let archetype = required_string(&value, spec_path, "archetype")?;
    Ok((id.to_owned(), archetype.to_owned()))
}

fn validate_op_id(id: &str, spec_path: &Path) -> Result<(), String> {
    if !id.is_empty()
        && id
            .bytes()
            .all(|byte| matches!(byte, b'a'..=b'z' | b'0'..=b'9' | b'_' | b'.'))
    {
        return Ok(());
    }
    Err(format!(
        "Fix: change op id `{id}` in {} to match ^[a-z0-9_.]+$ with ASCII characters only",
        spec_path.display()
    ))
}

fn validate_reserved_prefix(id: &str, spec_path: &Path) -> Result<(), String> {
    if (id.starts_with("internal.") || id.starts_with("test."))
        && env::var_os(RESERVED_ID_ENV).is_none()
    {
        return Err(format!(
            "Fix: rename reserved op id `{id}` in {} or set {RESERVED_ID_ENV}=1 as a maintainer",
            spec_path.display()
        ));
    }
    Ok(())
}

fn required_string<'a>(value: &'a Value, spec_path: &Path, field: &str) -> Result<&'a str, String> {
    value
        .get(field)
        .and_then(Value::as_str)
        .ok_or_else(|| format!("Fix: add string field `{field}` to {}", spec_path.display()))
}

fn render_walked_ops(specs: &[(String, String)]) -> String {
    let mut output = String::from(
        "/// Operation metadata discovered from co-located spec.toml files.\n\
         #[must_use]\n\
         pub fn walked_ops() -> &'static [(&'static str, &'static str)] {\n\
         \x20   &[\n",
    );
    for (id, archetype) in specs {
        output.push_str(&format!("        ({id:?}, {archetype:?}),\n"));
    }
    output.push_str("    ]\n}\n");
    output
}