ringkernel-core 1.1.0

//! Private module for proc macro integration.
//!
//! This module contains types that are used by the `ringkernel-derive` proc macros
//! but are not part of the public API. They are exposed for macro-generated code only.

use crate::types::KernelMode;

/// Kernel registration information collected by `#[ring_kernel]` macro.
///
/// This struct is used with the `inventory` crate to collect kernel registrations
/// at compile time. It should not be used directly by user code.
#[derive(Debug, Clone)]
pub struct KernelRegistration {
    /// Unique kernel identifier.
    pub id: &'static str,
    /// Execution mode (persistent or event-driven).
    pub mode: KernelMode,
    /// Grid size (number of blocks).
    pub grid_size: u32,
    /// Block size (threads per block).
    pub block_size: u32,
    /// Target kernels this kernel publishes to (for K2K routing).
    pub publishes_to: &'static [&'static str],
}

// Register the type with inventory for static collection
inventory::collect!(KernelRegistration);

/// Get all registered kernels.
pub fn registered_kernels() -> impl Iterator<Item = &'static KernelRegistration> {
    inventory::iter::<KernelRegistration>()
}

/// Stencil kernel registration information collected by `#[stencil_kernel]` macro.
///
/// This struct stores the generated CUDA source code and stencil configuration
/// for runtime compilation and execution.
#[derive(Debug, Clone)]
pub struct StencilKernelRegistration {
    /// Unique kernel identifier.
    pub id: &'static str,
    /// Grid dimensionality ("1d", "2d", or "3d").
    pub grid: &'static str,
    /// Tile width (block X dimension).
    pub tile_width: u32,
    /// Tile height (block Y dimension).
    pub tile_height: u32,
    /// Halo/ghost cell width (stencil radius).
    pub halo: u32,
    /// Generated CUDA C source code.
    pub cuda_source: &'static str,
}

// Register stencil kernels with inventory
inventory::collect!(StencilKernelRegistration);

/// Get all registered stencil kernels.
pub fn registered_stencil_kernels() -> impl Iterator<Item = &'static StencilKernelRegistration> {
    inventory::iter::<StencilKernelRegistration>()
}

/// Find a stencil kernel registration by ID.
pub fn find_stencil_kernel(id: &str) -> Option<&'static StencilKernelRegistration> {
    registered_stencil_kernels().find(|k| k.id == id)
}

/// GPU kernel registration for multi-backend kernels collected by `#[gpu_kernel]` macro.
///
/// This struct stores backend-independent kernel metadata and capability requirements.
/// The actual backend-specific source code is stored in constants generated by the macro.
#[derive(Debug, Clone)]
pub struct GpuKernelRegistration {
    /// Unique kernel identifier.
    pub id: &'static str,
    /// Block/workgroup size.
    pub block_size: u32,
    /// Required GPU capabilities (e.g., "f64", "atomic64", "subgroups").
    pub capabilities: &'static [&'static str],
    /// Compatible backends that support all required capabilities.
    pub backends: &'static [&'static str],
    /// Fallback order for runtime backend selection.
    pub fallback_order: &'static [&'static str],
}

// Register GPU kernels with inventory
inventory::collect!(GpuKernelRegistration);

/// Get all registered GPU kernels.
pub fn registered_gpu_kernels() -> impl Iterator<Item = &'static GpuKernelRegistration> {
    inventory::iter::<GpuKernelRegistration>()
}

/// Find a GPU kernel registration by ID.
pub fn find_gpu_kernel(id: &str) -> Option<&'static GpuKernelRegistration> {
    registered_gpu_kernels().find(|k| k.id == id)
}

/// Check if a backend supports a specific capability.
pub fn backend_supports_capability(backend: &str, capability: &str) -> bool {
    match (backend, capability) {
        // CUDA supports everything
        ("cuda", _) => true,

        // Metal capabilities
        ("metal", "f64") => false,
        ("metal", "cooperative_groups") => false,
        ("metal", "dynamic_parallelism") => false,
        ("metal", _) => true,

        // WebGPU capabilities
        ("wgpu", "f64") => false,
        ("wgpu", "i64") => false,
        ("wgpu", "atomic64") => false, // Emulated only
        ("wgpu", "cooperative_groups") => false,
        ("wgpu", "dynamic_parallelism") => false,
        ("wgpu", _) => true,

        // CPU supports everything (in emulation)
        ("cpu", _) => true,

        // Unknown backend/capability
        _ => false,
    }
}

/// Select the best backend from fallback order that supports all capabilities.
pub fn select_backend(
    fallback_order: &[&str],
    required_capabilities: &[&str],
    available_backends: &[&str],
) -> Option<&'static str> {
    for backend in fallback_order {
        // Check if backend is available
        if !available_backends.contains(backend) {
            continue;
        }

        // Check if backend supports all required capabilities
        let supports_all = required_capabilities
            .iter()
            .all(|cap| backend_supports_capability(backend, cap));

        if supports_all {
            // Return a static string for the matching backend
            return match *backend {
                "cuda" => Some("cuda"),
                "metal" => Some("metal"),
                "wgpu" => Some("wgpu"),
                "cpu" => Some("cpu"),
                _ => None,
            };
        }
    }
    None
}