#![allow(unsafe_code)]
pub mod allocator;
pub mod compiler;
pub mod cpu;
pub mod gpu;
pub mod registry;
#[cfg(feature = "bitnet")]
pub mod bitnet;
#[cfg(feature = "custom-hardware")]
pub mod custom;
#[cfg(feature = "wasm")]
pub mod wasm;
pub use allocator::{
AlignedMemoryAllocator, PoolConfig, PooledMemoryAllocator, SystemMemoryAllocator,
calculate_tensor_size, get_alignment_requirement, get_simd_alignment,
};
pub use compiler::{
CompilationResult, CompilationStats, FusionConfig, FusionType, KernelCompiler, MemoryConfig,
MemoryPlan, TensorInfo,
};
pub use cpu::{
CpuExecutionProvider, CpuKernel, CpuMemoryAllocator, SimdCapabilities, create_cpu_provider,
create_cpu_provider_with_config, create_numa_cpu_provider, detect_simd_capabilities,
};
pub use gpu::{
BandwidthOptimizedPlacement, CudaCompileOptions, CudaKernelManager, GpuExecutionProvider,
GpuMemoryAllocator, GpuTopology, GpuTopologyManager, LocalityAwarePlacement,
MultiGpuMemoryConfig, MultiGpuMemoryManager, PlacementPlan, PlacementStrategy,
PowerEfficientPlacement, SyncStrategy, TopologyConfig, Workload, WorkloadType,
create_gpu_provider, create_gpu_provider_with_config,
};
pub use registry::{ProviderRegistry, RegistryStatistics};
pub use ronn_core::{ExecutionProvider, ProviderType};
#[cfg(feature = "bitnet")]
pub use bitnet::{
BinaryTensor, BitNetExecutionProvider, BitNetKernel, BitNetOperation, BitNetProviderConfig,
BitNetQuantizer, QuantizationMethod, TernaryTensor, create_bitnet_provider,
};
#[cfg(feature = "custom-hardware")]
pub use custom::{
CustomHardwareProvider, CustomProviderRegistry, HardwareCapability, NpuConfig, NpuProvider,
TpuConfig, TpuProvider, create_npu_provider, create_tpu_provider,
};
#[cfg(feature = "wasm")]
pub use wasm::{WasmBridge, WasmExecutionProvider, WasmProviderConfig, create_wasm_provider};
pub type Result<T> = anyhow::Result<T>;
pub fn create_provider_system() -> Result<ProviderRegistry> {
let registry = ProviderRegistry::new();
let cpu_provider = create_cpu_provider()?;
registry.register_provider(cpu_provider)?;
match create_gpu_provider() {
Ok(gpu_provider) => {
registry.register_provider(gpu_provider)?;
tracing::info!("Registered both CPU and GPU providers");
}
Err(e) => {
tracing::info!("GPU provider not available: {}, using CPU only", e);
}
}
Ok(registry)
}
pub fn create_cpu_only_system() -> Result<ProviderRegistry> {
let registry = ProviderRegistry::new();
let cpu_provider = create_cpu_provider()?;
registry.register_provider(cpu_provider)?;
tracing::info!("Registered CPU-only provider system");
Ok(registry)
}
pub fn create_comprehensive_provider_system() -> Result<ProviderRegistry> {
let registry = create_provider_system()?;
#[cfg(feature = "bitnet")]
{
match create_bitnet_provider() {
Ok(bitnet_provider) => {
registry.register_provider(bitnet_provider)?;
tracing::info!("Registered BitNet provider for 1-bit quantized models");
}
Err(e) => {
tracing::warn!("BitNet provider registration failed: {}", e);
}
}
}
#[cfg(feature = "wasm")]
{
match create_wasm_provider() {
Ok(wasm_provider) => {
registry.register_provider(wasm_provider)?;
tracing::info!("Registered WebAssembly provider for browser deployment");
}
Err(e) => {
tracing::warn!("WebAssembly provider registration failed: {}", e);
}
}
}
#[cfg(feature = "custom-hardware")]
{
match create_npu_provider() {
Ok(npu_provider) => {
registry.register_provider(npu_provider)?;
tracing::info!("Registered NPU provider");
}
Err(e) => {
tracing::debug!("NPU provider registration failed: {}", e);
}
}
match create_tpu_provider() {
Ok(tpu_provider) => {
registry.register_provider(tpu_provider)?;
tracing::info!("Registered TPU provider");
}
Err(e) => {
tracing::debug!("TPU provider registration failed: {}", e);
}
}
}
Ok(registry)
}
pub fn create_performance_compiler() -> KernelCompiler {
let fusion_config = FusionConfig {
enable_fusion: true,
max_fusion_depth: 6,
enable_elementwise_fusion: true,
enable_conv_fusion: true,
enable_matmul_fusion: true,
};
let memory_config = MemoryConfig {
enable_optimization: true,
prefer_row_major: true,
enable_tensor_reuse: true,
max_memory_overhead: 0.3, };
KernelCompiler::with_config(fusion_config, memory_config)
}
pub fn create_memory_optimized_compiler() -> KernelCompiler {
let fusion_config = FusionConfig {
enable_fusion: true,
max_fusion_depth: 3,
enable_elementwise_fusion: true,
enable_conv_fusion: false, enable_matmul_fusion: true,
};
let memory_config = MemoryConfig {
enable_optimization: true,
prefer_row_major: true,
enable_tensor_reuse: true,
max_memory_overhead: 0.1, };
KernelCompiler::with_config(fusion_config, memory_config)
}
#[cfg(test)]
mod tests {
use super::*;
use ronn_core::{DataType, GraphNode, SubGraph, Tensor, TensorAllocator, TensorLayout};
use std::collections::HashMap;
#[test]
fn test_provider_system_creation() -> Result<()> {
let registry = create_provider_system()?;
let stats = registry.get_statistics();
assert!(stats.provider_count >= 1);
assert!(stats.total_supported_ops > 0);
assert!(!stats.preference_order.is_empty());
Ok(())
}
#[test]
fn test_cpu_only_system() -> Result<()> {
let registry = create_cpu_only_system()?;
let stats = registry.get_statistics();
assert_eq!(stats.provider_count, 1);
assert_eq!(stats.preference_order.len(), 1);
assert_eq!(stats.preference_order[0], ronn_core::ProviderId::CPU);
Ok(())
}
#[test]
fn test_kernel_compiler_variants() -> Result<()> {
let perf_compiler = create_performance_compiler();
let memory_compiler = create_memory_optimized_compiler();
let subgraph = SubGraph {
nodes: vec![GraphNode {
id: 0,
op_type: "Add".to_string(),
attributes: HashMap::new(),
inputs: vec!["input1".to_string(), "input2".to_string()],
outputs: vec!["temp1".to_string()],
name: Some("test_add".to_string()),
}],
edges: vec![],
inputs: vec!["input1".to_string(), "input2".to_string()],
outputs: vec!["temp1".to_string()],
};
let perf_result = perf_compiler.compile(&subgraph)?;
let memory_result = memory_compiler.compile(&subgraph)?;
assert!(perf_result.fused_ops.len() > 0);
assert!(memory_result.fused_ops.len() > 0);
Ok(())
}
#[test]
fn test_end_to_end_execution() -> Result<()> {
let registry = create_cpu_only_system()?;
let subgraph = SubGraph {
nodes: vec![GraphNode {
id: 0,
op_type: "Add".to_string(),
attributes: HashMap::new(),
inputs: vec!["input1".to_string(), "input2".to_string()],
outputs: vec!["output1".to_string()],
name: Some("test_add".to_string()),
}],
edges: vec![],
inputs: vec!["input1".to_string(), "input2".to_string()],
outputs: vec!["output1".to_string()],
};
let (provider_id, kernel) = registry.compile_subgraph(subgraph)?;
assert_eq!(provider_id, ronn_core::ProviderId::CPU);
let input1 = Tensor::ones(vec![4], DataType::F32, TensorLayout::RowMajor)?;
let input2 = Tensor::ones(vec![4], DataType::F32, TensorLayout::RowMajor)?;
let inputs = vec![input1, input2];
let outputs = kernel.execute(&inputs)?;
assert!(!outputs.is_empty());
let stats = kernel.get_performance_stats();
assert_eq!(stats.execution_count, 1);
Ok(())
}
#[test]
fn test_allocator_integration() -> Result<()> {
let registry = create_cpu_only_system()?;
let cpu_provider = registry
.get_provider(ronn_core::ProviderId::CPU)
.expect("CPU provider should exist");
let allocator = cpu_provider.get_allocator();
let buffer = allocator.allocate(&[100], DataType::F32)?;
assert_eq!(buffer.size, 400); assert_eq!(buffer.memory_type, ronn_core::MemoryType::SystemRAM);
let memory_info = allocator.get_memory_info();
assert!(memory_info.allocated_bytes > 0);
allocator.deallocate(buffer)?;
Ok(())
}
#[test]
fn test_simd_detection() {
let capabilities = detect_simd_capabilities();
#[cfg(target_arch = "x86_64")]
{
assert!(capabilities.sse2);
}
#[cfg(target_arch = "aarch64")]
{
assert!(capabilities.fma);
}
println!("Detected SIMD capabilities: {:?}", capabilities);
}
#[test]
fn test_memory_pooling() -> Result<()> {
let config = PoolConfig {
max_buffers_per_bucket: 4,
max_pool_size: 1024 * 1024, bucket_granularity: 64,
};
let allocator = PooledMemoryAllocator::new(config);
let buffer1 = allocator.allocate(&[64], DataType::F32)?; allocator.deallocate(buffer1)?;
let buffer2 = allocator.allocate(&[64], DataType::F32)?; allocator.deallocate(buffer2)?;
let hit_rate = allocator.get_hit_rate();
assert!(hit_rate >= 0.0 && hit_rate <= 1.0);
Ok(())
}
#[test]
fn test_provider_preference_order() -> Result<()> {
let registry = create_provider_system()?;
let initial_order = registry.get_preference_order();
assert!(!initial_order.is_empty());
let custom_order = vec![ronn_core::ProviderId::CPU];
registry.set_preference_order(custom_order.clone())?;
let updated_order = registry.get_preference_order();
assert_eq!(updated_order, custom_order);
Ok(())
}
}