1#![allow(dead_code)]
8use crate::error::{BackendError, BackendResult};
9use crate::{Device, MemoryManager};
10use std::collections::HashMap;
11use std::sync::{Arc, RwLock};
12use torsh_core::device::DeviceType;
13
14#[cfg(feature = "cuda")]
15use crate::cuda::CudaDevice as SciRs2CudaDevice;
16
17#[cfg(feature = "cuda")]
20mod scirs2_cuda {
21 #[derive(Debug)]
23 pub struct MockCudaDevice {
24 id: usize,
25 }
26
27 pub mod memory {
28 pub enum MemoryAdvice {
29 SetPreferredLocation(u32),
30 SetAccessedBy(u32),
31 SetReadMostly,
32 UnsetReadMostly,
33 }
34
35 pub async fn prefetch_async(
36 _device: &crate::cuda::CudaDevice,
37 _ptr: *const u8,
38 _size: usize,
39 ) -> Result<(), String> {
40 Err("CUDA not available".to_string())
41 }
42
43 pub async fn set_advice(
44 _ptr: *const u8,
45 _size: usize,
46 _advice: MemoryAdvice,
47 ) -> Result<(), String> {
48 Err("CUDA not available".to_string())
49 }
50
51 pub async fn copy_peer_to_peer(
52 _src: *const u8,
53 _dst: *mut u8,
54 _size: usize,
55 ) -> Result<(), String> {
56 Err("CUDA not available".to_string())
57 }
58
59 pub async fn copy_host_to_device_async(
60 _src: *const u8,
61 _dst: *mut u8,
62 _size: usize,
63 ) -> Result<(), String> {
64 Err("CUDA not available".to_string())
65 }
66
67 pub async fn copy_device_to_host_async(
68 _src: *const u8,
69 _dst: *mut u8,
70 _size: usize,
71 ) -> Result<(), String> {
72 Err("CUDA not available".to_string())
73 }
74
75 pub fn copy_host_to_device(
76 _src: *const u8,
77 _dst: *mut u8,
78 _size: usize,
79 ) -> Result<(), String> {
80 Err("CUDA not available".to_string())
81 }
82
83 pub fn copy_device_to_host(
84 _src: *const u8,
85 _dst: *mut u8,
86 _size: usize,
87 ) -> Result<(), String> {
88 Err("CUDA not available".to_string())
89 }
90 }
91
92 pub fn synchronize(_device: &crate::cuda::CudaDevice) -> Result<(), String> {
94 Err("CUDA not available".to_string())
95 }
96}
97
98#[cfg(all(feature = "metal", target_os = "macos", target_arch = "aarch64"))]
99use crate::metal::MetalDevice as SciRs2MetalDevice;
100
101#[cfg(all(feature = "metal", target_os = "macos", target_arch = "aarch64"))]
103mod scirs2_metal {
104 pub mod memory {
105 use crate::metal::device::MetalDevice;
106
107 pub enum CpuCacheMode {
108 WriteCombined,
109 }
110
111 pub fn set_cpu_cache_mode(
112 _device: &MetalDevice,
113 _ptr: *mut u8,
114 _mode: CpuCacheMode,
115 ) -> Result<(), String> {
116 Ok(())
118 }
119
120 pub async fn copy_host_to_device_async(
121 _device: &MetalDevice,
122 _src_ptr: *const u8,
123 _dst_ptr: *mut u8,
124 _size: usize,
125 ) -> Result<(), String> {
126 Ok(())
128 }
129
130 pub async fn copy_device_to_host_async(
131 _device: &MetalDevice,
132 _src_ptr: *const u8,
133 _dst_ptr: *mut u8,
134 _size: usize,
135 ) -> Result<(), String> {
136 Ok(())
138 }
139
140 pub fn copy_host_to_device(
141 _device: &MetalDevice,
142 _src_ptr: *const u8,
143 _dst_ptr: *mut u8,
144 _size: usize,
145 ) -> Result<(), String> {
146 Ok(())
148 }
149
150 pub fn copy_device_to_host(
151 _device: &MetalDevice,
152 _src_ptr: *const u8,
153 _dst_ptr: *mut u8,
154 _size: usize,
155 ) -> Result<(), String> {
156 Ok(())
158 }
159 }
160
161 pub fn synchronize(_device: &crate::metal::device::MetalDevice) -> Result<(), String> {
162 Ok(())
164 }
165}
166
167#[derive(Debug, Clone, Copy, PartialEq, Eq)]
169pub enum TransferMode {
170 Synchronous,
172 Asynchronous,
174 Streaming,
176 PeerToPeer,
178}
179
180#[derive(Debug, Clone, Copy, PartialEq, Eq)]
182pub enum TransferDirection {
183 HostToDevice,
185 DeviceToHost,
187 DeviceToDevice,
189 CrossDevice,
191}
192
193#[derive(Debug, Clone, Copy)]
195pub struct ZeroCopyCapabilities {
196 pub unified_memory: bool,
198 pub peer_to_peer: bool,
200 pub memory_mapping: bool,
202 pub direct_gpu_access: bool,
204 pub pinned_memory: bool,
206 pub memory_advice: bool,
208 pub async_transfers: bool,
210 pub streaming_transfers: bool,
212}
213
214impl Default for ZeroCopyCapabilities {
215 fn default() -> Self {
216 Self {
217 unified_memory: false,
218 peer_to_peer: false,
219 memory_mapping: false,
220 direct_gpu_access: false,
221 pinned_memory: false,
222 memory_advice: false,
223 async_transfers: false,
224 streaming_transfers: false,
225 }
226 }
227}
228
229impl ZeroCopyCapabilities {
230 pub fn has_any_capabilities(&self) -> bool {
232 self.unified_memory
233 || self.peer_to_peer
234 || self.memory_mapping
235 || self.direct_gpu_access
236 || self.pinned_memory
237 || self.async_transfers
238 }
239
240 pub fn capability_score(&self) -> f32 {
242 let mut score = 0.0;
243 let total_features = 8.0;
244
245 if self.unified_memory {
246 score += 1.0;
247 }
248 if self.peer_to_peer {
249 score += 1.0;
250 }
251 if self.memory_mapping {
252 score += 1.0;
253 }
254 if self.direct_gpu_access {
255 score += 1.0;
256 }
257 if self.pinned_memory {
258 score += 1.0;
259 }
260 if self.memory_advice {
261 score += 1.0;
262 }
263 if self.async_transfers {
264 score += 1.0;
265 }
266 if self.streaming_transfers {
267 score += 1.0;
268 }
269
270 score / total_features
271 }
272
273 pub fn recommended_transfer_mode(&self) -> TransferMode {
275 if self.streaming_transfers {
276 TransferMode::Streaming
277 } else if self.async_transfers {
278 TransferMode::Asynchronous
279 } else if self.peer_to_peer {
280 TransferMode::PeerToPeer
281 } else {
282 TransferMode::Synchronous
283 }
284 }
285}
286
287#[derive(Debug, Clone)]
289pub struct ZeroCopyTransfer {
290 pub source_device: Device,
292 pub destination_device: Device,
294 pub direction: TransferDirection,
296 pub mode: TransferMode,
298 pub source_ptr: *mut u8,
300 pub destination_ptr: *mut u8,
302 pub size: usize,
304 pub alignment: usize,
306 pub priority: u32,
308 pub stream_id: Option<u64>,
310}
311
312unsafe impl Send for ZeroCopyTransfer {}
313unsafe impl Sync for ZeroCopyTransfer {}
314
315impl ZeroCopyTransfer {
316 pub fn new(
318 source_device: Device,
319 destination_device: Device,
320 source_ptr: *mut u8,
321 destination_ptr: *mut u8,
322 size: usize,
323 ) -> Self {
324 let direction = if source_device.device_type() == DeviceType::Cpu
325 && destination_device.device_type() != DeviceType::Cpu
326 {
327 TransferDirection::HostToDevice
328 } else if source_device.device_type() != DeviceType::Cpu
329 && destination_device.device_type() == DeviceType::Cpu
330 {
331 TransferDirection::DeviceToHost
332 } else if source_device.id() == destination_device.id() {
333 TransferDirection::DeviceToDevice
334 } else {
335 TransferDirection::CrossDevice
336 };
337
338 Self {
339 source_device,
340 destination_device,
341 direction,
342 mode: TransferMode::Synchronous,
343 source_ptr,
344 destination_ptr,
345 size,
346 alignment: 1,
347 priority: 1,
348 stream_id: None,
349 }
350 }
351
352 pub fn with_mode(mut self, mode: TransferMode) -> Self {
354 self.mode = mode;
355 self
356 }
357
358 pub fn with_alignment(mut self, alignment: usize) -> Self {
360 self.alignment = alignment;
361 self
362 }
363
364 pub fn with_priority(mut self, priority: u32) -> Self {
366 self.priority = priority;
367 self
368 }
369
370 pub fn with_stream(mut self, stream_id: u64) -> Self {
372 self.stream_id = Some(stream_id);
373 self
374 }
375
376 pub fn is_zero_copy_possible(&self, capabilities: &ZeroCopyCapabilities) -> bool {
378 match self.direction {
379 TransferDirection::HostToDevice | TransferDirection::DeviceToHost => {
380 capabilities.unified_memory || capabilities.pinned_memory
381 }
382 TransferDirection::DeviceToDevice => capabilities.memory_mapping,
383 TransferDirection::CrossDevice => capabilities.peer_to_peer,
384 }
385 }
386
387 pub fn estimate_bandwidth(&self, device_type: DeviceType) -> u64 {
389 match (device_type, self.direction) {
390 (DeviceType::Cuda(_), TransferDirection::HostToDevice) => {
391 if self.alignment >= 256 {
392 25_000_000_000 } else {
394 12_000_000_000 }
396 }
397 (DeviceType::Cuda(_), TransferDirection::DeviceToHost) => {
398 if self.alignment >= 256 {
399 20_000_000_000 } else {
401 10_000_000_000 }
403 }
404 (DeviceType::Cuda(_), TransferDirection::CrossDevice) => 50_000_000_000, (DeviceType::Metal(_), TransferDirection::HostToDevice) => 40_000_000_000, (DeviceType::Metal(_), TransferDirection::DeviceToHost) => 40_000_000_000,
407 (DeviceType::Wgpu(_), TransferDirection::HostToDevice) => 8_000_000_000, (DeviceType::Wgpu(_), TransferDirection::DeviceToHost) => 6_000_000_000, (DeviceType::Cpu, _) => 50_000_000_000, _ => 1_000_000_000, }
412 }
413
414 pub fn estimate_transfer_time_us(&self, device_type: DeviceType) -> u64 {
416 let bandwidth = self.estimate_bandwidth(device_type);
417 if bandwidth == 0 {
418 u64::MAX
419 } else {
420 (self.size as u64 * 1_000_000) / bandwidth
421 }
422 }
423}
424
425#[derive(Debug, Default, Clone)]
427pub struct ZeroCopyStats {
428 pub total_transfers: u64,
430 pub zero_copy_transfers: u64,
432 pub fallback_transfers: u64,
434 pub zero_copy_bytes: u64,
436 pub fallback_bytes: u64,
438 pub total_transfer_time_us: u64,
440 pub average_bandwidth: f64,
442 pub error_count: u64,
444}
445
446impl ZeroCopyStats {
447 pub fn zero_copy_success_rate(&self) -> f64 {
449 if self.total_transfers == 0 {
450 0.0
451 } else {
452 (self.zero_copy_transfers as f64) / (self.total_transfers as f64)
453 }
454 }
455
456 pub fn bandwidth_efficiency(&self, theoretical_bandwidth: u64) -> f64 {
458 if theoretical_bandwidth == 0 {
459 0.0
460 } else {
461 self.average_bandwidth / (theoretical_bandwidth as f64)
462 }
463 }
464
465 pub fn error_rate(&self) -> f64 {
467 if self.total_transfers == 0 {
468 0.0
469 } else {
470 (self.error_count as f64) / (self.total_transfers as f64)
471 }
472 }
473
474 pub fn update_transfer(
476 &mut self,
477 bytes: u64,
478 time_us: u64,
479 was_zero_copy: bool,
480 was_error: bool,
481 ) {
482 self.total_transfers += 1;
483
484 if was_error {
485 self.error_count += 1;
486 return;
487 }
488
489 if was_zero_copy {
490 self.zero_copy_transfers += 1;
491 self.zero_copy_bytes += bytes;
492 } else {
493 self.fallback_transfers += 1;
494 self.fallback_bytes += bytes;
495 }
496
497 self.total_transfer_time_us += time_us;
498
499 let total_bytes = self.zero_copy_bytes + self.fallback_bytes;
501 if self.total_transfer_time_us > 0 {
502 self.average_bandwidth =
503 (total_bytes as f64) / (self.total_transfer_time_us as f64 / 1_000_000.0);
504 }
505 }
506}
507
508pub struct ZeroCopyManager {
510 capabilities: Arc<RwLock<HashMap<String, ZeroCopyCapabilities>>>,
512 stats: Arc<RwLock<ZeroCopyStats>>,
514 memory_managers: HashMap<String, Arc<dyn MemoryManager>>,
516 #[cfg(feature = "cuda")]
518 cuda_devices: HashMap<String, Arc<SciRs2CudaDevice>>,
519 #[cfg(all(feature = "metal", target_os = "macos", target_arch = "aarch64"))]
521 metal_devices: HashMap<String, Arc<SciRs2MetalDevice>>,
522}
523
524impl ZeroCopyManager {
525 pub fn new() -> Self {
527 Self {
528 capabilities: Arc::new(RwLock::new(HashMap::new())),
529 stats: Arc::new(RwLock::new(ZeroCopyStats::default())),
530 memory_managers: HashMap::new(),
531 #[cfg(feature = "cuda")]
532 cuda_devices: HashMap::new(),
533 #[cfg(all(feature = "metal", target_os = "macos", target_arch = "aarch64"))]
534 metal_devices: HashMap::new(),
535 }
536 }
537
538 pub fn register_device(
540 &mut self,
541 device: &Device,
542 capabilities: ZeroCopyCapabilities,
543 memory_manager: Arc<dyn MemoryManager>,
544 ) -> BackendResult<()> {
545 let device_key = format!("{}:{}", device.device_type(), device.id());
546
547 {
548 let mut caps = self
549 .capabilities
550 .write()
551 .expect("lock should not be poisoned");
552 caps.insert(device_key.clone(), capabilities);
553 }
554
555 self.memory_managers.insert(device_key, memory_manager);
556 Ok(())
557 }
558
559 #[cfg(feature = "cuda")]
561 pub fn register_cuda_device(
562 &mut self,
563 device: &Device,
564 scirs2_device: Arc<SciRs2CudaDevice>,
565 capabilities: ZeroCopyCapabilities,
566 memory_manager: Arc<dyn MemoryManager>,
567 ) -> BackendResult<()> {
568 let device_key = format!("{}:{}", device.device_type(), device.id());
569
570 self.register_device(device, capabilities, memory_manager)?;
572
573 self.cuda_devices.insert(device_key, scirs2_device);
575 Ok(())
576 }
577
578 #[cfg(all(feature = "metal", target_os = "macos", target_arch = "aarch64"))]
580 pub fn register_metal_device(
581 &mut self,
582 device: &Device,
583 scirs2_device: Arc<SciRs2MetalDevice>,
584 capabilities: ZeroCopyCapabilities,
585 memory_manager: Arc<dyn MemoryManager>,
586 ) -> BackendResult<()> {
587 let device_key = format!("{}:{}", device.device_type(), device.id());
588
589 self.register_device(device, capabilities, memory_manager)?;
591
592 self.metal_devices.insert(device_key, scirs2_device);
594 Ok(())
595 }
596
597 pub fn get_capabilities(&self, device: &Device) -> Option<ZeroCopyCapabilities> {
599 let device_key = format!("{}:{}", device.device_type(), device.id());
600 let caps = self
601 .capabilities
602 .read()
603 .expect("lock should not be poisoned");
604 caps.get(&device_key).copied()
605 }
606
607 pub fn can_zero_copy(&self, source: &Device, destination: &Device) -> bool {
609 let source_caps = self.get_capabilities(source);
610 let dest_caps = self.get_capabilities(destination);
611
612 match (source_caps, dest_caps) {
613 (Some(src), Some(dst)) => {
614 if source.id() == destination.id() {
616 src.memory_mapping && dst.memory_mapping
618 } else if source.device_type() == DeviceType::Cpu {
619 dst.unified_memory || dst.pinned_memory
621 } else if destination.device_type() == DeviceType::Cpu {
622 src.unified_memory || src.pinned_memory
624 } else {
625 src.peer_to_peer && dst.peer_to_peer
627 }
628 }
629 _ => false,
630 }
631 }
632
633 pub async fn transfer(&mut self, transfer: &ZeroCopyTransfer) -> BackendResult<bool> {
635 let start_time = std::time::Instant::now();
636
637 if !self.can_zero_copy(&transfer.source_device, &transfer.destination_device) {
639 return self.fallback_transfer(transfer, start_time).await;
640 }
641
642 let result = match transfer.direction {
644 TransferDirection::HostToDevice => self.host_to_device_transfer(transfer).await,
645 TransferDirection::DeviceToHost => self.device_to_host_transfer(transfer).await,
646 TransferDirection::DeviceToDevice => self.device_to_device_transfer(transfer).await,
647 TransferDirection::CrossDevice => self.cross_device_transfer(transfer).await,
648 };
649
650 let elapsed_us = start_time.elapsed().as_micros() as u64;
651 let was_zero_copy = result.is_ok() && result.as_ref().unwrap_or(&false) == &true;
652 let was_error = result.is_err();
653
654 {
656 let mut stats = self.stats.write().expect("lock should not be poisoned");
657 stats.update_transfer(transfer.size as u64, elapsed_us, was_zero_copy, was_error);
658 }
659
660 result
661 }
662
663 async fn host_to_device_transfer(&self, transfer: &ZeroCopyTransfer) -> BackendResult<bool> {
665 let dest_caps = self
666 .get_capabilities(&transfer.destination_device)
667 .ok_or_else(|| {
668 BackendError::BackendError("Destination device not registered".to_string())
669 })?;
670
671 if dest_caps.unified_memory {
672 self.unified_memory_transfer(transfer).await
674 } else if dest_caps.pinned_memory {
675 self.pinned_memory_transfer(transfer).await
677 } else {
678 Err(BackendError::BackendError(
679 "No zero-copy method available for host to device transfer".to_string(),
680 ))
681 }
682 }
683
684 async fn device_to_host_transfer(&self, transfer: &ZeroCopyTransfer) -> BackendResult<bool> {
686 let source_caps = self
687 .get_capabilities(&transfer.source_device)
688 .ok_or_else(|| {
689 BackendError::BackendError("Source device not registered".to_string())
690 })?;
691
692 if source_caps.unified_memory {
693 self.unified_memory_transfer(transfer).await
695 } else if source_caps.pinned_memory {
696 self.pinned_memory_transfer(transfer).await
698 } else {
699 Err(BackendError::BackendError(
700 "No zero-copy method available for device to host transfer".to_string(),
701 ))
702 }
703 }
704
705 async fn device_to_device_transfer(&self, transfer: &ZeroCopyTransfer) -> BackendResult<bool> {
707 let device_caps = self
708 .get_capabilities(&transfer.source_device)
709 .ok_or_else(|| BackendError::BackendError("Device not registered".to_string()))?;
710
711 if device_caps.memory_mapping {
712 self.memory_mapped_transfer(transfer).await
714 } else {
715 Err(BackendError::BackendError(
716 "No zero-copy method available for device to device transfer".to_string(),
717 ))
718 }
719 }
720
721 #[allow(unused_unsafe)]
723 async fn cross_device_transfer(&self, transfer: &ZeroCopyTransfer) -> BackendResult<bool> {
724 let source_caps = self
725 .get_capabilities(&transfer.source_device)
726 .ok_or_else(|| {
727 BackendError::BackendError("Source device not registered".to_string())
728 })?;
729 let dest_caps = self
730 .get_capabilities(&transfer.destination_device)
731 .ok_or_else(|| {
732 BackendError::BackendError("Destination device not registered".to_string())
733 })?;
734
735 if source_caps.peer_to_peer && dest_caps.peer_to_peer {
736 self.peer_to_peer_transfer(transfer).await
738 } else {
739 Err(BackendError::BackendError(
740 "No zero-copy method available for cross-device transfer".to_string(),
741 ))
742 }
743 }
744
745 async fn unified_memory_transfer(&self, transfer: &ZeroCopyTransfer) -> BackendResult<bool> {
747 let device_key = format!(
748 "{}:{}",
749 transfer.destination_device.device_type(),
750 transfer.destination_device.id()
751 );
752
753 match transfer.destination_device.device_type() {
754 #[cfg(feature = "cuda")]
755 DeviceType::Cuda(_) => {
756 if self.cuda_devices.get(&device_key).is_some() {
757 Err(BackendError::BackendError(
760 "CUDA unified memory prefetch not yet implemented - requires scirs2_cuda"
761 .to_string(),
762 ))
763 } else {
764 Err(BackendError::BackendError(
765 "CUDA device not registered for unified memory".to_string(),
766 ))
767 }
768 }
769
770 #[cfg(all(feature = "metal", target_os = "macos", target_arch = "aarch64"))]
771 DeviceType::Metal(_) => {
772 if let Some(metal_device) = self.metal_devices.get(&device_key) {
773 #[allow(unused_unsafe)]
775 unsafe {
776 scirs2_metal::memory::set_cpu_cache_mode(
777 metal_device,
778 transfer.source_ptr,
779 scirs2_metal::memory::CpuCacheMode::WriteCombined,
780 )
781 .map_err(|e| {
782 BackendError::BackendError(format!("Metal cache mode failed: {}", e))
783 })?;
784
785 }
788 Ok(true)
789 } else {
790 Err(BackendError::BackendError(
791 "Metal device not registered for unified memory".to_string(),
792 ))
793 }
794 }
795
796 _ => {
797 if let Some(memory_manager) = self.memory_managers.get(&device_key) {
799 let _ = memory_manager.set_memory_advice(
800 transfer.source_ptr,
801 transfer.size,
802 crate::memory::MemoryAdvice::SetPreferredLocation,
803 );
804 let _ = memory_manager.prefetch_to_device(transfer.source_ptr, transfer.size);
805 Ok(true)
806 } else {
807 Err(BackendError::BackendError(
808 "Memory manager not found for device".to_string(),
809 ))
810 }
811 }
812 }
813 }
814
815 async fn pinned_memory_transfer(&self, transfer: &ZeroCopyTransfer) -> BackendResult<bool> {
817 #[allow(unused_variables)]
818 let device_key = format!(
819 "{}:{}",
820 transfer.destination_device.device_type(),
821 transfer.destination_device.id()
822 );
823
824 match transfer.destination_device.device_type() {
825 #[cfg(feature = "cuda")]
826 DeviceType::Cuda(_) => {
827 if let Some(cuda_device) = self.cuda_devices.get(&device_key) {
828 match transfer.mode {
829 TransferMode::Asynchronous => {
830 self.launch_cuda_async_transfer(cuda_device, transfer).await
831 }
832 TransferMode::Streaming => {
833 self.launch_cuda_streaming_transfer(cuda_device, transfer)
834 .await
835 }
836 _ => self.launch_cuda_sync_transfer(cuda_device, transfer).await,
837 }
838 } else {
839 Err(BackendError::BackendError(
840 "CUDA device not registered for pinned memory".to_string(),
841 ))
842 }
843 }
844
845 #[cfg(all(feature = "metal", target_os = "macos", target_arch = "aarch64"))]
846 DeviceType::Metal(_) => {
847 if let Some(metal_device) = self.metal_devices.get(&device_key) {
848 match transfer.mode {
849 TransferMode::Asynchronous => {
850 self.launch_metal_async_transfer(metal_device, transfer)
851 .await
852 }
853 TransferMode::Streaming => {
854 self.launch_metal_streaming_transfer(metal_device, transfer)
855 .await
856 }
857 _ => {
858 self.launch_metal_sync_transfer(metal_device, transfer)
859 .await
860 }
861 }
862 } else {
863 Err(BackendError::BackendError(
864 "Metal device not registered for pinned memory".to_string(),
865 ))
866 }
867 }
868
869 _ => {
870 match transfer.mode {
872 TransferMode::Asynchronous => self.launch_async_dma(transfer).await,
873 TransferMode::Streaming => self.launch_streaming_transfer(transfer).await,
874 _ => self.launch_sync_dma(transfer).await,
875 }
876 }
877 }
878 }
879
880 async fn memory_mapped_transfer(&self, transfer: &ZeroCopyTransfer) -> BackendResult<bool> {
882 if transfer.source_ptr.is_null() || transfer.destination_ptr.is_null() {
886 return Err(BackendError::InvalidArgument(
887 "Null pointer in memory mapped transfer".to_string(),
888 ));
889 }
890
891 self.launch_device_copy(transfer).await
893 }
894
895 async fn peer_to_peer_transfer(&self, transfer: &ZeroCopyTransfer) -> BackendResult<bool> {
897 let source_caps = self
901 .get_capabilities(&transfer.source_device)
902 .expect("source device capabilities should exist");
903 let dest_caps = self
904 .get_capabilities(&transfer.destination_device)
905 .expect("destination device capabilities should exist");
906
907 if !source_caps.peer_to_peer || !dest_caps.peer_to_peer {
908 return Err(BackendError::BackendError(
909 "Peer-to-peer not supported on one or both devices".to_string(),
910 ));
911 }
912
913 self.launch_p2p_transfer(transfer).await
915 }
916
917 async fn fallback_transfer(
919 &mut self,
920 transfer: &ZeroCopyTransfer,
921 start_time: std::time::Instant,
922 ) -> BackendResult<bool> {
923 if transfer.source_ptr.is_null() || transfer.destination_ptr.is_null() {
925 return Err(BackendError::InvalidArgument(
926 "Null pointer in fallback transfer".to_string(),
927 ));
928 }
929
930 unsafe {
933 std::ptr::copy_nonoverlapping(
934 transfer.source_ptr,
935 transfer.destination_ptr,
936 transfer.size,
937 );
938 }
939
940 let elapsed_us = start_time.elapsed().as_micros() as u64;
941
942 {
944 let mut stats = self.stats.write().expect("lock should not be poisoned");
945 stats.update_transfer(transfer.size as u64, elapsed_us, false, false);
946 }
947
948 Ok(false) }
950
951 async fn launch_async_dma(&self, transfer: &ZeroCopyTransfer) -> BackendResult<bool> {
953 #[cfg(feature = "async")]
956 tokio::task::yield_now().await;
957
958 if !transfer.source_ptr.is_null() && !transfer.destination_ptr.is_null() {
960 unsafe {
961 std::ptr::copy_nonoverlapping(
962 transfer.source_ptr,
963 transfer.destination_ptr,
964 transfer.size,
965 );
966 }
967 }
968
969 Ok(true)
970 }
971
972 async fn launch_streaming_transfer(&self, transfer: &ZeroCopyTransfer) -> BackendResult<bool> {
974 const CHUNK_SIZE: usize = 64 * 1024 * 1024; let num_chunks = (transfer.size + CHUNK_SIZE - 1) / CHUNK_SIZE;
978
979 for chunk in 0..num_chunks {
980 let chunk_offset = chunk * CHUNK_SIZE;
981 let chunk_size = std::cmp::min(CHUNK_SIZE, transfer.size - chunk_offset);
982
983 if chunk_size == 0 {
984 break;
985 }
986
987 let chunk_transfer = ZeroCopyTransfer {
989 source_ptr: unsafe { transfer.source_ptr.add(chunk_offset) },
990 destination_ptr: unsafe { transfer.destination_ptr.add(chunk_offset) },
991 size: chunk_size,
992 mode: TransferMode::Asynchronous,
993 ..transfer.clone()
994 };
995
996 self.launch_async_dma(&chunk_transfer).await?;
998
999 #[cfg(feature = "async")]
1001 tokio::task::yield_now().await;
1002 }
1003
1004 Ok(true)
1005 }
1006
1007 async fn launch_sync_dma(&self, transfer: &ZeroCopyTransfer) -> BackendResult<bool> {
1009 if !transfer.source_ptr.is_null() && !transfer.destination_ptr.is_null() {
1011 unsafe {
1012 std::ptr::copy_nonoverlapping(
1013 transfer.source_ptr,
1014 transfer.destination_ptr,
1015 transfer.size,
1016 );
1017 }
1018 Ok(true)
1019 } else {
1020 Err(BackendError::InvalidArgument(
1021 "Null pointer in sync DMA transfer".to_string(),
1022 ))
1023 }
1024 }
1025
1026 async fn launch_device_copy(&self, transfer: &ZeroCopyTransfer) -> BackendResult<bool> {
1028 if !transfer.source_ptr.is_null() && !transfer.destination_ptr.is_null() {
1030 unsafe {
1031 std::ptr::copy_nonoverlapping(
1032 transfer.source_ptr,
1033 transfer.destination_ptr,
1034 transfer.size,
1035 );
1036 }
1037 Ok(true)
1038 } else {
1039 Err(BackendError::InvalidArgument(
1040 "Null pointer in device copy".to_string(),
1041 ))
1042 }
1043 }
1044
1045 #[allow(unused_unsafe)]
1047 async fn launch_p2p_transfer(&self, transfer: &ZeroCopyTransfer) -> BackendResult<bool> {
1048 #[allow(unused_variables)]
1049 let source_key = format!(
1050 "{}:{}",
1051 transfer.source_device.device_type(),
1052 transfer.source_device.id()
1053 );
1054 #[allow(unused_variables)]
1055 let dest_key = format!(
1056 "{}:{}",
1057 transfer.destination_device.device_type(),
1058 transfer.destination_device.id()
1059 );
1060
1061 match (
1062 transfer.source_device.device_type(),
1063 transfer.destination_device.device_type(),
1064 ) {
1065 #[cfg(feature = "cuda")]
1066 (DeviceType::Cuda(_), DeviceType::Cuda(_)) => {
1067 if self.cuda_devices.get(&source_key).is_some()
1068 && self.cuda_devices.get(&dest_key).is_some()
1069 {
1070 Err(BackendError::BackendError(
1072 "CUDA P2P transfer not yet implemented - requires scirs2_cuda".to_string(),
1073 ))
1074 } else {
1075 Err(BackendError::BackendError(
1076 "CUDA devices not registered for P2P transfer".to_string(),
1077 ))
1078 }
1079 }
1080
1081 _ => {
1082 if !transfer.source_ptr.is_null() && !transfer.destination_ptr.is_null() {
1084 unsafe {
1085 std::ptr::copy_nonoverlapping(
1086 transfer.source_ptr,
1087 transfer.destination_ptr,
1088 transfer.size,
1089 );
1090 }
1091 Ok(true)
1092 } else {
1093 Err(BackendError::InvalidArgument(
1094 "Null pointer in P2P transfer".to_string(),
1095 ))
1096 }
1097 }
1098 }
1099 }
1100
1101 #[cfg(feature = "cuda")]
1103 #[allow(unused_unsafe)]
1104 async fn launch_cuda_async_transfer(
1105 &self,
1106 _cuda_device: &SciRs2CudaDevice,
1107 _transfer: &ZeroCopyTransfer,
1108 ) -> BackendResult<bool> {
1109 Err(BackendError::BackendError(
1111 "CUDA async transfer not yet implemented - requires scirs2_cuda".to_string(),
1112 ))
1113 }
1114
1115 #[cfg(feature = "cuda")]
1117 async fn launch_cuda_sync_transfer(
1118 &self,
1119 _cuda_device: &SciRs2CudaDevice,
1120 _transfer: &ZeroCopyTransfer,
1121 ) -> BackendResult<bool> {
1122 Err(BackendError::BackendError(
1124 "CUDA sync transfer not yet implemented - requires scirs2_cuda".to_string(),
1125 ))
1126 }
1127
1128 #[cfg(feature = "cuda")]
1130 async fn launch_cuda_streaming_transfer(
1131 &self,
1132 cuda_device: &SciRs2CudaDevice,
1133 transfer: &ZeroCopyTransfer,
1134 ) -> BackendResult<bool> {
1135 const CHUNK_SIZE: usize = 64 * 1024 * 1024; let num_chunks = (transfer.size + CHUNK_SIZE - 1) / CHUNK_SIZE;
1137
1138 for chunk in 0..num_chunks {
1139 let chunk_offset = chunk * CHUNK_SIZE;
1140 let chunk_size = std::cmp::min(CHUNK_SIZE, transfer.size - chunk_offset);
1141
1142 if chunk_size == 0 {
1143 break;
1144 }
1145
1146 let chunk_transfer = ZeroCopyTransfer {
1148 source_ptr: unsafe { transfer.source_ptr.add(chunk_offset) },
1149 destination_ptr: unsafe { transfer.destination_ptr.add(chunk_offset) },
1150 size: chunk_size,
1151 ..transfer.clone()
1152 };
1153
1154 self.launch_cuda_async_transfer(cuda_device, &chunk_transfer)
1156 .await?;
1157
1158 #[cfg(feature = "async")]
1160 tokio::task::yield_now().await;
1161 }
1162
1163 scirs2_cuda::synchronize(cuda_device).map_err(|e| {
1165 BackendError::BackendError(format!("CUDA streaming sync failed: {}", e))
1166 })?;
1167
1168 Ok(true)
1169 }
1170
1171 #[cfg(all(feature = "metal", target_os = "macos", target_arch = "aarch64"))]
1173 async fn launch_metal_async_transfer(
1174 &self,
1175 metal_device: &SciRs2MetalDevice,
1176 transfer: &ZeroCopyTransfer,
1177 ) -> BackendResult<bool> {
1178 #[allow(unused_unsafe)]
1179 unsafe {
1180 match transfer.direction {
1181 TransferDirection::HostToDevice => {
1182 scirs2_metal::memory::copy_host_to_device_async(
1183 metal_device,
1184 transfer.source_ptr,
1185 transfer.destination_ptr,
1186 transfer.size,
1187 )
1188 .await
1189 .map_err(|e| {
1190 BackendError::BackendError(format!(
1191 "Metal H2D async transfer failed: {}",
1192 e
1193 ))
1194 })?;
1195 }
1196 TransferDirection::DeviceToHost => {
1197 scirs2_metal::memory::copy_device_to_host_async(
1198 metal_device,
1199 transfer.source_ptr,
1200 transfer.destination_ptr,
1201 transfer.size,
1202 )
1203 .await
1204 .map_err(|e| {
1205 BackendError::BackendError(format!(
1206 "Metal D2H async transfer failed: {}",
1207 e
1208 ))
1209 })?;
1210 }
1211 _ => {
1212 return Err(BackendError::InvalidArgument(
1213 "Invalid transfer direction for Metal async transfer".to_string(),
1214 ));
1215 }
1216 }
1217 }
1218 Ok(true)
1219 }
1220
1221 #[cfg(all(feature = "metal", target_os = "macos", target_arch = "aarch64"))]
1223 async fn launch_metal_sync_transfer(
1224 &self,
1225 metal_device: &SciRs2MetalDevice,
1226 transfer: &ZeroCopyTransfer,
1227 ) -> BackendResult<bool> {
1228 #[allow(unused_unsafe)]
1229 unsafe {
1230 match transfer.direction {
1231 TransferDirection::HostToDevice => {
1232 scirs2_metal::memory::copy_host_to_device(
1233 metal_device,
1234 transfer.source_ptr,
1235 transfer.destination_ptr,
1236 transfer.size,
1237 )
1238 .map_err(|e| {
1239 BackendError::BackendError(format!("Metal H2D sync transfer failed: {}", e))
1240 })?;
1241 }
1242 TransferDirection::DeviceToHost => {
1243 scirs2_metal::memory::copy_device_to_host(
1244 metal_device,
1245 transfer.source_ptr,
1246 transfer.destination_ptr,
1247 transfer.size,
1248 )
1249 .map_err(|e| {
1250 BackendError::BackendError(format!("Metal D2H sync transfer failed: {}", e))
1251 })?;
1252 }
1253 _ => {
1254 return Err(BackendError::InvalidArgument(
1255 "Invalid transfer direction for Metal sync transfer".to_string(),
1256 ));
1257 }
1258 }
1259 }
1260 Ok(true)
1261 }
1262
1263 #[cfg(all(feature = "metal", target_os = "macos", target_arch = "aarch64"))]
1265 async fn launch_metal_streaming_transfer(
1266 &self,
1267 metal_device: &SciRs2MetalDevice,
1268 transfer: &ZeroCopyTransfer,
1269 ) -> BackendResult<bool> {
1270 const CHUNK_SIZE: usize = 32 * 1024 * 1024; let num_chunks = (transfer.size + CHUNK_SIZE - 1) / CHUNK_SIZE;
1272
1273 for chunk in 0..num_chunks {
1274 let chunk_offset = chunk * CHUNK_SIZE;
1275 let chunk_size = std::cmp::min(CHUNK_SIZE, transfer.size - chunk_offset);
1276
1277 if chunk_size == 0 {
1278 break;
1279 }
1280
1281 let chunk_transfer = ZeroCopyTransfer {
1283 source_ptr: unsafe { transfer.source_ptr.add(chunk_offset) },
1284 destination_ptr: unsafe { transfer.destination_ptr.add(chunk_offset) },
1285 size: chunk_size,
1286 ..transfer.clone()
1287 };
1288
1289 self.launch_metal_async_transfer(metal_device, &chunk_transfer)
1291 .await?;
1292
1293 #[cfg(feature = "async")]
1295 tokio::task::yield_now().await;
1296 }
1297
1298 scirs2_metal::synchronize(metal_device).map_err(|e| {
1300 BackendError::BackendError(format!("Metal streaming sync failed: {}", e))
1301 })?;
1302
1303 Ok(true)
1304 }
1305
1306 pub fn get_stats(&self) -> ZeroCopyStats {
1308 self.stats
1309 .read()
1310 .expect("lock should not be poisoned")
1311 .clone()
1312 }
1313
1314 pub fn reset_stats(&self) {
1316 let mut stats = self.stats.write().expect("lock should not be poisoned");
1317 *stats = ZeroCopyStats::default();
1318 }
1319
1320 pub fn get_optimal_transfer_mode(&self, transfer: &ZeroCopyTransfer) -> TransferMode {
1322 let source_caps = self.get_capabilities(&transfer.source_device);
1323 let dest_caps = self.get_capabilities(&transfer.destination_device);
1324
1325 match (source_caps, dest_caps) {
1326 (Some(src), Some(dst)) => {
1327 if transfer.size > 100 * 1024 * 1024
1329 && src.streaming_transfers
1330 && dst.streaming_transfers
1331 {
1332 TransferMode::Streaming
1333 } else if src.async_transfers && dst.async_transfers {
1334 TransferMode::Asynchronous
1335 } else if transfer.direction == TransferDirection::CrossDevice
1336 && src.peer_to_peer
1337 && dst.peer_to_peer
1338 {
1339 TransferMode::PeerToPeer
1340 } else {
1341 TransferMode::Synchronous
1342 }
1343 }
1344 _ => TransferMode::Synchronous,
1345 }
1346 }
1347
1348 pub fn optimize_transfer(&self, mut transfer: ZeroCopyTransfer) -> ZeroCopyTransfer {
1350 transfer.mode = self.get_optimal_transfer_mode(&transfer);
1352
1353 if transfer.alignment < 256 && transfer.size > 1024 * 1024 {
1355 transfer.alignment = 256; }
1357
1358 transfer.priority = if transfer.size > 100 * 1024 * 1024 {
1360 0 } else {
1362 1 };
1364
1365 transfer
1366 }
1367}
1368
1369impl Default for ZeroCopyManager {
1370 fn default() -> Self {
1371 Self::new()
1372 }
1373}
1374
1375pub mod utils {
1377 use super::*;
1378
1379 pub fn detect_capabilities(device_type: DeviceType) -> ZeroCopyCapabilities {
1381 match device_type {
1382 DeviceType::Cuda(_) => ZeroCopyCapabilities {
1383 unified_memory: true,
1384 peer_to_peer: true,
1385 memory_mapping: true,
1386 direct_gpu_access: true,
1387 pinned_memory: true,
1388 memory_advice: true,
1389 async_transfers: true,
1390 streaming_transfers: true,
1391 },
1392 DeviceType::Metal(_) => ZeroCopyCapabilities {
1393 unified_memory: true,
1394 peer_to_peer: false, memory_mapping: true,
1396 direct_gpu_access: true,
1397 pinned_memory: true,
1398 memory_advice: true,
1399 async_transfers: true,
1400 streaming_transfers: true,
1401 },
1402 DeviceType::Wgpu(_) => ZeroCopyCapabilities {
1403 unified_memory: false,
1404 peer_to_peer: false,
1405 memory_mapping: true,
1406 direct_gpu_access: false,
1407 pinned_memory: false,
1408 memory_advice: false,
1409 async_transfers: true,
1410 streaming_transfers: false,
1411 },
1412 DeviceType::Cpu => ZeroCopyCapabilities {
1413 unified_memory: true,
1414 peer_to_peer: false,
1415 memory_mapping: true,
1416 direct_gpu_access: false,
1417 pinned_memory: true,
1418 memory_advice: false,
1419 async_transfers: false,
1420 streaming_transfers: false,
1421 },
1422 }
1423 }
1424
1425 pub fn check_alignment(ptr: *const u8, alignment: usize) -> bool {
1427 if alignment == 0 || (alignment & (alignment - 1)) != 0 {
1428 return false; }
1430 (ptr as usize).is_multiple_of(alignment)
1431 }
1432
1433 pub fn optimal_chunk_size(total_size: usize, device_type: DeviceType) -> usize {
1435 let base_chunk_size = match device_type {
1436 DeviceType::Cuda(_) => 64 * 1024 * 1024, DeviceType::Metal(_) => 32 * 1024 * 1024, DeviceType::Wgpu(_) => 16 * 1024 * 1024, DeviceType::Cpu => 128 * 1024 * 1024, };
1441
1442 if total_size < base_chunk_size {
1444 total_size
1445 } else {
1446 std::cmp::min(base_chunk_size, total_size / 8) }
1448 }
1449
1450 pub fn estimate_efficiency(
1452 transfer: &ZeroCopyTransfer,
1453 capabilities: &ZeroCopyCapabilities,
1454 ) -> f32 {
1455 if !transfer.is_zero_copy_possible(capabilities) {
1456 return 0.0; }
1458
1459 let mut efficiency: f32 = 1.0;
1460
1461 if transfer.alignment < 256 {
1463 efficiency *= 0.8;
1464 }
1465
1466 if transfer.size < 4096 {
1468 efficiency *= 0.5;
1469 }
1470
1471 match transfer.mode {
1473 TransferMode::Streaming if transfer.size > 100 * 1024 * 1024 => efficiency *= 1.2,
1474 TransferMode::PeerToPeer if transfer.direction == TransferDirection::CrossDevice => {
1475 efficiency *= 1.3
1476 }
1477 TransferMode::Asynchronous => efficiency *= 1.1,
1478 _ => {}
1479 }
1480
1481 efficiency.min(1.0)
1482 }
1483}
1484
1485#[cfg(test)]
1486mod tests {
1487 use super::*;
1488 use crate::device::{Device, DeviceInfo};
1489 use std::ptr::null_mut;
1490
1491 fn create_test_device(device_type: DeviceType, id: usize) -> Device {
1492 let info = DeviceInfo::default();
1493 Device::new(
1494 id,
1495 device_type,
1496 format!("Test {:?} {}", device_type, id),
1497 info,
1498 )
1499 }
1500
1501 #[test]
1502 fn test_zero_copy_capabilities_default() {
1503 let caps = ZeroCopyCapabilities::default();
1504 assert!(!caps.has_any_capabilities());
1505 assert_eq!(caps.capability_score(), 0.0);
1506 assert_eq!(caps.recommended_transfer_mode(), TransferMode::Synchronous);
1507 }
1508
1509 #[test]
1510 fn test_zero_copy_capabilities_scoring() {
1511 let mut caps = ZeroCopyCapabilities::default();
1512 caps.unified_memory = true;
1513 caps.async_transfers = true;
1514
1515 assert!(caps.has_any_capabilities());
1516 assert_eq!(caps.capability_score(), 0.25); assert_eq!(caps.recommended_transfer_mode(), TransferMode::Asynchronous);
1518 }
1519
1520 #[test]
1521 fn test_zero_copy_transfer_creation() {
1522 let cpu_device = create_test_device(DeviceType::Cpu, 0);
1523 let gpu_device = create_test_device(DeviceType::Cuda(1), 1);
1524
1525 let transfer = ZeroCopyTransfer::new(cpu_device, gpu_device, null_mut(), null_mut(), 1024);
1526
1527 assert_eq!(transfer.direction, TransferDirection::HostToDevice);
1528 assert_eq!(transfer.mode, TransferMode::Synchronous);
1529 assert_eq!(transfer.size, 1024);
1530 assert_eq!(transfer.alignment, 1);
1531 assert_eq!(transfer.priority, 1);
1532 assert!(transfer.stream_id.is_none());
1533 }
1534
1535 #[test]
1536 fn test_zero_copy_transfer_direction_detection() {
1537 let cpu_device = create_test_device(DeviceType::Cpu, 0);
1538 let gpu_device1 = create_test_device(DeviceType::Cuda(1), 1);
1539 let gpu_device2 = create_test_device(DeviceType::Cuda(2), 2);
1540
1541 let transfer = ZeroCopyTransfer::new(
1543 cpu_device.clone(),
1544 gpu_device1.clone(),
1545 null_mut(),
1546 null_mut(),
1547 1024,
1548 );
1549 assert_eq!(transfer.direction, TransferDirection::HostToDevice);
1550
1551 let transfer = ZeroCopyTransfer::new(
1553 gpu_device1.clone(),
1554 cpu_device,
1555 null_mut(),
1556 null_mut(),
1557 1024,
1558 );
1559 assert_eq!(transfer.direction, TransferDirection::DeviceToHost);
1560
1561 let transfer = ZeroCopyTransfer::new(
1563 gpu_device1.clone(),
1564 gpu_device1.clone(),
1565 null_mut(),
1566 null_mut(),
1567 1024,
1568 );
1569 assert_eq!(transfer.direction, TransferDirection::DeviceToDevice);
1570
1571 let transfer = ZeroCopyTransfer::new(
1573 gpu_device1.clone(),
1574 gpu_device2,
1575 null_mut(),
1576 null_mut(),
1577 1024,
1578 );
1579 assert_eq!(transfer.direction, TransferDirection::CrossDevice);
1580 }
1581
1582 #[test]
1583 fn test_zero_copy_transfer_builder() {
1584 let cpu_device = create_test_device(DeviceType::Cpu, 0);
1585 let gpu_device = create_test_device(DeviceType::Cuda(1), 1);
1586
1587 let transfer = ZeroCopyTransfer::new(cpu_device, gpu_device, null_mut(), null_mut(), 1024)
1588 .with_mode(TransferMode::Asynchronous)
1589 .with_alignment(256)
1590 .with_priority(0)
1591 .with_stream(42);
1592
1593 assert_eq!(transfer.mode, TransferMode::Asynchronous);
1594 assert_eq!(transfer.alignment, 256);
1595 assert_eq!(transfer.priority, 0);
1596 assert_eq!(transfer.stream_id, Some(42));
1597 }
1598
1599 #[test]
1600 fn test_zero_copy_transfer_zero_copy_possible() {
1601 let cpu_device = create_test_device(DeviceType::Cpu, 0);
1602 let gpu_device = create_test_device(DeviceType::Cuda(1), 1);
1603
1604 let transfer = ZeroCopyTransfer::new(cpu_device, gpu_device, null_mut(), null_mut(), 1024);
1605
1606 let caps_unified = ZeroCopyCapabilities {
1607 unified_memory: true,
1608 ..Default::default()
1609 };
1610
1611 let caps_pinned = ZeroCopyCapabilities {
1612 pinned_memory: true,
1613 ..Default::default()
1614 };
1615
1616 let caps_none = ZeroCopyCapabilities::default();
1617
1618 assert!(transfer.is_zero_copy_possible(&caps_unified));
1619 assert!(transfer.is_zero_copy_possible(&caps_pinned));
1620 assert!(!transfer.is_zero_copy_possible(&caps_none));
1621 }
1622
1623 #[test]
1624 fn test_zero_copy_transfer_bandwidth_estimation() {
1625 let cpu_device = create_test_device(DeviceType::Cpu, 0);
1626 let gpu_device = create_test_device(DeviceType::Cuda(1), 1);
1627
1628 let transfer = ZeroCopyTransfer::new(cpu_device, gpu_device, null_mut(), null_mut(), 1024)
1629 .with_alignment(256);
1630
1631 let bandwidth = transfer.estimate_bandwidth(DeviceType::Cuda(1));
1632 assert_eq!(bandwidth, 25_000_000_000); let transfer_unaligned = ZeroCopyTransfer::new(
1635 create_test_device(DeviceType::Cpu, 0),
1636 create_test_device(DeviceType::Cuda(1), 1),
1637 null_mut(),
1638 null_mut(),
1639 1024,
1640 )
1641 .with_alignment(1);
1642
1643 let bandwidth_unaligned = transfer_unaligned.estimate_bandwidth(DeviceType::Cuda(1));
1644 assert_eq!(bandwidth_unaligned, 12_000_000_000); }
1646
1647 #[test]
1648 fn test_zero_copy_stats() {
1649 let mut stats = ZeroCopyStats::default();
1650
1651 assert_eq!(stats.zero_copy_success_rate(), 0.0);
1653 assert_eq!(stats.error_rate(), 0.0);
1654
1655 stats.update_transfer(1024, 100, true, false);
1657 assert_eq!(stats.total_transfers, 1);
1658 assert_eq!(stats.zero_copy_transfers, 1);
1659 assert_eq!(stats.zero_copy_success_rate(), 1.0);
1660
1661 stats.update_transfer(512, 200, false, false);
1663 assert_eq!(stats.total_transfers, 2);
1664 assert_eq!(stats.fallback_transfers, 1);
1665 assert_eq!(stats.zero_copy_success_rate(), 0.5);
1666
1667 stats.update_transfer(256, 50, false, true);
1669 assert_eq!(stats.total_transfers, 3);
1670 assert_eq!(stats.error_count, 1);
1671 assert!((stats.error_rate() - (1.0 / 3.0)).abs() < 0.001);
1672 }
1673
1674 #[test]
1675 fn test_zero_copy_manager_creation() {
1676 let manager = ZeroCopyManager::new();
1677 assert!(manager
1678 .capabilities
1679 .read()
1680 .expect("lock should not be poisoned")
1681 .is_empty());
1682
1683 let stats = manager.get_stats();
1684 assert_eq!(stats.total_transfers, 0);
1685 }
1686
1687 #[test]
1688 fn test_utils_detect_capabilities() {
1689 let cuda_caps = utils::detect_capabilities(DeviceType::Cuda(0));
1690 assert!(cuda_caps.unified_memory);
1691 assert!(cuda_caps.peer_to_peer);
1692 assert!(cuda_caps.streaming_transfers);
1693
1694 let webgpu_caps = utils::detect_capabilities(DeviceType::Wgpu(0));
1695 assert!(!webgpu_caps.unified_memory);
1696 assert!(!webgpu_caps.peer_to_peer);
1697 assert!(!webgpu_caps.streaming_transfers);
1698 }
1699
1700 #[test]
1701 fn test_utils_check_alignment() {
1702 let ptr = 0x1000 as *const u8; assert!(utils::check_alignment(ptr, 16));
1705 assert!(utils::check_alignment(ptr, 256));
1706 assert!(utils::check_alignment(ptr, 4096));
1707 assert!(!utils::check_alignment(ptr, 8192));
1708
1709 assert!(!utils::check_alignment(ptr, 0));
1711 assert!(!utils::check_alignment(ptr, 3)); }
1713
1714 #[test]
1715 fn test_utils_optimal_chunk_size() {
1716 let cuda_chunk = utils::optimal_chunk_size(1024 * 1024 * 1024, DeviceType::Cuda(0));
1717 assert_eq!(cuda_chunk, 64 * 1024 * 1024); let small_chunk = utils::optimal_chunk_size(1024, DeviceType::Cuda(0));
1720 assert_eq!(small_chunk, 1024); }
1722
1723 #[test]
1724 fn test_utils_estimate_efficiency() {
1725 let cpu_device = create_test_device(DeviceType::Cpu, 0);
1726 let gpu_device = create_test_device(DeviceType::Cuda(1), 1);
1727
1728 let transfer =
1729 ZeroCopyTransfer::new(cpu_device, gpu_device, null_mut(), null_mut(), 1024 * 1024)
1730 .with_alignment(256)
1731 .with_mode(TransferMode::Asynchronous);
1732
1733 let caps = ZeroCopyCapabilities {
1734 unified_memory: true,
1735 async_transfers: true,
1736 ..Default::default()
1737 };
1738
1739 let efficiency = utils::estimate_efficiency(&transfer, &caps);
1740 assert!(efficiency > 0.0); assert!(efficiency <= 1.0); }
1743}