1pub mod bridge;
16pub mod cuda_kernels;
17pub mod gpu_neural_ops;
18pub mod memory_manager;
19pub mod performance_monitor;
20pub mod wasm_bindings;
21pub mod wasm_types;
22pub mod examples;
23pub mod benchmarks;
24
25use crate::{CudaRust, Result as CudaResult};
26use std::sync::Arc;
27use std::marker::PhantomData;
28use thiserror::Error;
29
30#[derive(Error, Debug)]
32pub enum NeuralIntegrationError {
33 #[error("CUDA transpilation failed: {0}")]
34 TranspilationError(String),
35
36 #[error("GPU initialization failed: {0}")]
37 GpuInitError(String),
38
39 #[error("Memory allocation failed: {0}")]
40 MemoryError(String),
41
42 #[error("Neural operation failed: {0}")]
43 OperationError(String),
44
45 #[error("Performance degradation detected: {0}")]
46 PerformanceError(String),
47
48 #[error("Type conversion error: {0}")]
49 TypeError(String),
50}
51
52pub type NeuralResult<T> = std::result::Result<T, NeuralIntegrationError>;
53
54pub struct NeuralBridge {
56 cuda_transpiler: CudaRust,
57 gpu_backend: Option<Arc<dyn GpuBackendTrait>>,
58 memory_manager: Arc<dyn MemoryManagerTrait>,
59 performance_monitor: Arc<dyn PerformanceMonitorTrait>,
60 config: BridgeConfig,
61}
62
63#[derive(Debug, Clone)]
65pub struct BridgeConfig {
66 pub enable_gpu: bool,
68 pub gpu_device: GpuDevice,
70 pub memory_pool_size: usize,
72 pub enable_monitoring: bool,
74 pub auto_fallback: bool,
76 pub batch_size: usize,
78 pub precision: Precision,
80}
81
82#[derive(Debug, Clone, Copy)]
84pub enum GpuDevice {
85 Auto,
86 HighPerformance,
87 LowPower,
88 Discrete,
89 Integrated,
90}
91
92#[derive(Debug, Clone, Copy)]
94pub enum Precision {
95 Float16,
96 Float32,
97 Float64,
98}
99
100pub trait GpuBackendTrait: Send + Sync {
102 fn initialize(&self) -> NeuralResult<()>;
103 fn is_available(&self) -> bool;
104 fn get_device_info(&self) -> DeviceInfo;
105 fn create_buffer(&self, size: usize) -> NeuralResult<BufferHandle>;
106 fn execute_kernel(&self, kernel: &CompiledKernel, inputs: &[BufferHandle]) -> NeuralResult<BufferHandle>;
107}
108
109pub trait MemoryManagerTrait: Send + Sync {
111 fn allocate(&self, size: usize) -> NeuralResult<MemoryHandle>;
112 fn deallocate(&self, handle: MemoryHandle) -> NeuralResult<()>;
113 fn transfer_to_gpu(&self, data: &[f32]) -> NeuralResult<BufferHandle>;
114 fn transfer_from_gpu(&self, buffer: BufferHandle) -> NeuralResult<Vec<f32>>;
115 fn get_memory_stats(&self) -> MemoryStats;
116}
117
118pub trait PerformanceMonitorTrait: Send + Sync {
120 fn start_operation(&self, name: &str) -> OperationHandle;
121 fn end_operation(&self, handle: OperationHandle) -> NeuralResult<OperationStats>;
122 fn get_performance_summary(&self) -> PerformanceStats;
123 fn detect_degradation(&self) -> Option<PerformanceDegradation>;
124}
125
126#[derive(Debug, Clone)]
128pub struct DeviceInfo {
129 pub name: String,
130 pub vendor: String,
131 pub device_type: String,
132 pub memory_size: usize,
133 pub compute_units: u32,
134 pub max_workgroup_size: u32,
135 pub supports_f16: bool,
136 pub supports_f64: bool,
137}
138
139#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
141pub struct BufferHandle(u64);
142#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
143pub struct MemoryHandle(u64);
144#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
145pub struct OperationHandle(u64);
146
147#[derive(Debug, Clone)]
149pub struct CompiledKernel {
150 pub name: String,
151 pub wgsl_source: String,
152 pub entry_point: String,
153 pub workgroup_size: [u32; 3],
154 pub bind_group_layout: Vec<BindingType>,
155}
156
157#[derive(Debug, Clone)]
159pub enum BindingType {
160 Buffer { read_only: bool },
161 UniformBuffer,
162 StorageTexture,
163}
164
165#[derive(Debug, Clone)]
167pub struct MemoryStats {
168 pub total_allocated: usize,
169 pub gpu_allocated: usize,
170 pub cpu_allocated: usize,
171 pub peak_usage: usize,
172 pub allocations: u64,
173 pub deallocations: u64,
174}
175
176#[derive(Debug, Clone)]
178pub struct PerformanceStats {
179 pub total_operations: u64,
180 pub average_execution_time: f64,
181 pub gpu_utilization: f32,
182 pub memory_bandwidth: f64,
183 pub throughput: f64,
184}
185
186#[derive(Debug, Clone)]
188pub struct OperationStats {
189 pub name: String,
190 pub execution_time: f64,
191 pub gpu_time: f64,
192 pub memory_transfer_time: f64,
193 pub throughput: f64,
194}
195
196#[derive(Debug, Clone)]
198pub struct PerformanceDegradation {
199 pub operation: String,
200 pub expected_time: f64,
201 pub actual_time: f64,
202 pub degradation_factor: f64,
203 pub suggested_action: String,
204}
205
206impl Default for BridgeConfig {
207 fn default() -> Self {
208 Self {
209 enable_gpu: true,
210 gpu_device: GpuDevice::Auto,
211 memory_pool_size: 512, enable_monitoring: true,
213 auto_fallback: true,
214 batch_size: 32,
215 precision: Precision::Float32,
216 }
217 }
218}
219
220impl NeuralBridge {
221 pub fn new() -> NeuralResult<Self> {
223 Self::with_config(BridgeConfig::default())
224 }
225
226 pub fn with_config(config: BridgeConfig) -> NeuralResult<Self> {
228 let cuda_transpiler = CudaRust::new();
229
230 let gpu_backend = if config.enable_gpu {
232 match bridge::WebGpuBackend::new(&config) {
233 Ok(backend) => Some(Arc::new(backend) as Arc<dyn GpuBackendTrait>),
234 Err(e) => {
235 if config.auto_fallback {
236 log::warn!("GPU initialization failed, falling back to CPU: {e}");
237 None
238 } else {
239 return Err(NeuralIntegrationError::GpuInitError(e.to_string()));
240 }
241 }
242 }
243 } else {
244 None
245 };
246
247 let memory_manager = Arc::new(memory_manager::HybridMemoryManager::new(&config)?);
249
250 let performance_monitor: Arc<dyn PerformanceMonitorTrait> = if config.enable_monitoring {
252 Arc::new(performance_monitor::RealTimeMonitor::new()?)
253 } else {
254 Arc::new(performance_monitor::NoOpMonitor::new())
255 };
256
257 Ok(Self {
258 cuda_transpiler,
259 gpu_backend,
260 memory_manager,
261 performance_monitor,
262 config,
263 })
264 }
265
266 pub fn is_gpu_available(&self) -> bool {
268 self.gpu_backend.as_ref().is_some_and(|b| b.is_available())
269 }
270
271 pub fn get_device_info(&self) -> Option<DeviceInfo> {
273 self.gpu_backend.as_ref().map(|b| b.get_device_info())
274 }
275
276 pub fn transpile_cuda_kernel(&self, cuda_source: &str) -> NeuralResult<CompiledKernel> {
278 let rust_code = self.cuda_transpiler
280 .transpile(cuda_source)
281 .map_err(|e| NeuralIntegrationError::TranspilationError(e.to_string()))?;
282
283 bridge::extract_wgsl_from_rust(&rust_code)
285 }
286
287 pub fn execute_neural_operation<T>(
289 &self,
290 operation: NeuralOperation<T>,
291 inputs: &[T],
292 ) -> NeuralResult<Vec<T>>
293 where
294 T: Clone + Send + Sync + 'static + bytemuck::Pod + num_traits::Float,
295 {
296 let handle = self.performance_monitor.start_operation(&operation.name());
297
298 let result = if let Some(ref backend) = self.gpu_backend {
299 match self.execute_on_gpu(operation.clone(), inputs, backend) {
301 Ok(result) => result,
302 Err(e) => {
303 if self.config.auto_fallback {
304 log::warn!("GPU execution failed, falling back to CPU: {e}");
305 self.execute_on_cpu(operation, inputs)?
306 } else {
307 return Err(e);
308 }
309 }
310 }
311 } else {
312 self.execute_on_cpu(operation, inputs)?
314 };
315
316 let stats = self.performance_monitor.end_operation(handle)?;
317
318 if let Some(degradation) = self.performance_monitor.detect_degradation() {
320 log::warn!("Performance degradation detected: {}", degradation.suggested_action);
321 }
322
323 log::debug!("Operation {} completed in {:.2}ms", stats.name, stats.execution_time * 1000.0);
324
325 Ok(result)
326 }
327
328 fn execute_on_gpu<T>(
330 &self,
331 operation: NeuralOperation<T>,
332 inputs: &[T],
333 backend: &Arc<dyn GpuBackendTrait>,
334 ) -> NeuralResult<Vec<T>>
335 where
336 T: Clone + Send + Sync + 'static + bytemuck::Pod,
337 {
338 gpu_neural_ops::execute_operation(operation, inputs, backend, &self.memory_manager)
340 }
341
342 fn execute_on_cpu<T>(
344 &self,
345 operation: NeuralOperation<T>,
346 inputs: &[T],
347 ) -> NeuralResult<Vec<T>>
348 where
349 T: Clone + Send + Sync + 'static + num_traits::Float,
350 {
351 bridge::execute_cpu_fallback(operation, inputs)
353 }
354
355 pub fn get_memory_stats(&self) -> MemoryStats {
357 self.memory_manager.get_memory_stats()
358 }
359
360 pub fn get_performance_stats(&self) -> PerformanceStats {
362 self.performance_monitor.get_performance_summary()
363 }
364
365 pub fn create_batch_processor(&self) -> BatchProcessor {
367 BatchProcessor::new(
368 self.gpu_backend.clone(),
369 self.memory_manager.clone(),
370 self.config.batch_size,
371 )
372 }
373}
374
375#[derive(Debug, Clone)]
377pub enum NeuralOperation<T> {
378 MatrixMultiply { a_rows: usize, a_cols: usize, b_cols: usize, _phantom: PhantomData<T> },
379 VectorAdd { size: usize, _phantom: PhantomData<T> },
380 ActivationFunction { function: ActivationFunction, size: usize, _phantom: PhantomData<T> },
381 Convolution { channels: usize, kernel_size: usize, stride: usize, _phantom: PhantomData<T> },
382 ForwardPropagation { layer_sizes: Vec<usize>, _phantom: PhantomData<T> },
383 BackwardPropagation { layer_sizes: Vec<usize>, _phantom: PhantomData<T> },
384 Custom { kernel_source: String, name: String, _phantom: PhantomData<T> },
385}
386
387impl<T> NeuralOperation<T> {
388 pub fn name(&self) -> String {
389 match self {
390 Self::MatrixMultiply { .. } => "matrix_multiply".to_string(),
391 Self::VectorAdd { .. } => "vector_add".to_string(),
392 Self::ActivationFunction { function, .. } => format!("activation_{function:?}"),
393 Self::Convolution { .. } => "convolution".to_string(),
394 Self::ForwardPropagation { .. } => "forward_propagation".to_string(),
395 Self::BackwardPropagation { .. } => "backward_propagation".to_string(),
396 Self::Custom { name, .. } => name.clone(),
397 }
398 }
399}
400
401#[derive(Debug, Clone, Copy)]
403pub enum ActivationFunction {
404 Sigmoid,
405 ReLU,
406 Tanh,
407 LeakyReLU,
408 Swish,
409 GELU,
410}
411
412pub struct BatchProcessor {
414 gpu_backend: Option<Arc<dyn GpuBackendTrait>>,
415 memory_manager: Arc<dyn MemoryManagerTrait>,
416 batch_size: usize,
417}
418
419impl BatchProcessor {
420 pub fn new(
421 gpu_backend: Option<Arc<dyn GpuBackendTrait>>,
422 memory_manager: Arc<dyn MemoryManagerTrait>,
423 batch_size: usize,
424 ) -> Self {
425 Self {
426 gpu_backend,
427 memory_manager,
428 batch_size,
429 }
430 }
431
432 pub fn process_batch<T>(&self, operations: Vec<NeuralOperation<T>>, inputs: Vec<Vec<T>>) -> NeuralResult<Vec<Vec<T>>>
434 where
435 T: Clone + Send + Sync + 'static + bytemuck::Pod + num_traits::Float,
436 {
437 gpu_neural_ops::process_batch(operations, inputs, &self.gpu_backend, &self.memory_manager, self.batch_size)
439 }
440}
441
442pub use bridge::{WebGpuBackend, extract_wgsl_from_rust, execute_cpu_fallback};
444pub use cuda_kernels::*;
445pub use gpu_neural_ops::{execute_operation, process_batch};
446pub use memory_manager::{HybridMemoryManager};
447pub use performance_monitor::{RealTimeMonitor, NoOpMonitor};
448pub use wasm_bindings::*;
449
450pub fn initialize() -> NeuralResult<()> {
452 #[cfg(target_arch = "wasm32")]
454 {
455 console_error_panic_hook::set_once();
456 wasm_logger::init(wasm_logger::Config::default());
457 }
458
459 #[cfg(not(target_arch = "wasm32"))]
460 {
461 env_logger::init();
462 }
463
464 log::info!("Neural integration system initialized");
465 Ok(())
466}
467
468pub fn get_capabilities() -> SystemCapabilities {
470 SystemCapabilities {
471 cuda_transpilation: true,
472 gpu_acceleration: cfg!(any(feature = "gpu", feature = "webgpu")),
473 wasm_support: cfg!(target_arch = "wasm32"),
474 performance_monitoring: true,
475 memory_pooling: true,
476 auto_fallback: true,
477 batch_processing: true,
478 precision_f16: true,
479 precision_f32: true,
480 precision_f64: cfg!(not(target_arch = "wasm32")),
481 }
482}
483
484#[derive(Debug, Clone)]
486pub struct SystemCapabilities {
487 pub cuda_transpilation: bool,
488 pub gpu_acceleration: bool,
489 pub wasm_support: bool,
490 pub performance_monitoring: bool,
491 pub memory_pooling: bool,
492 pub auto_fallback: bool,
493 pub batch_processing: bool,
494 pub precision_f16: bool,
495 pub precision_f32: bool,
496 pub precision_f64: bool,
497}
498
499impl Default for NeuralBridge {
500 fn default() -> Self {
501 Self::new().expect("Failed to create default neural bridge")
502 }
503}
504
505#[cfg(test)]
506mod tests {
507 use super::*;
508
509 #[test]
510 fn test_bridge_creation() {
511 let bridge = NeuralBridge::new();
512 assert!(bridge.is_ok());
513 }
514
515 #[test]
516 fn test_capabilities() {
517 let capabilities = get_capabilities();
518 assert!(capabilities.cuda_transpilation);
519 assert!(capabilities.performance_monitoring);
520 }
521
522 #[test]
523 fn test_config_default() {
524 let config = BridgeConfig::default();
525 assert_eq!(config.batch_size, 32);
526 assert_eq!(config.memory_pool_size, 512);
527 assert!(config.enable_gpu);
528 assert!(config.auto_fallback);
529 }
530}