optirs_gpu/memory/vendors/
oneapi_backend.rs

1// OneAPI backend for GPU memory management
2//
3// This module provides Intel OneAPI/SYCL-specific memory management functionality,
4// including device memory allocation, SYCL queues, and performance optimization
5// features specific to Intel GPUs and accelerators.
6
7#[allow(dead_code)]
8use std::collections::HashMap;
9use std::ffi::c_void;
10use std::ptr::NonNull;
11use std::sync::{Arc, Mutex};
12use std::time::{Duration, Instant};
13
14/// OneAPI memory backend implementation
15pub struct OneApiMemoryBackend {
16    /// Backend configuration
17    config: OneApiConfig,
18    /// Device properties
19    device_properties: SyclDeviceProperties,
20    /// Active SYCL contexts
21    contexts: HashMap<u32, SyclContext>,
22    /// Memory pools
23    memory_pools: HashMap<OneApiMemoryType, OneApiMemoryPool>,
24    /// Statistics
25    stats: OneApiStats,
26    /// Queue management
27    queue_manager: SyclQueueManager,
28}
29
30/// OneAPI backend configuration
31#[derive(Debug, Clone)]
32pub struct OneApiConfig {
33    /// Device ID to use
34    pub device_id: u32,
35    /// Enable unified shared memory (USM)
36    pub enable_usm: bool,
37    /// Enable device-specific USM
38    pub enable_device_usm: bool,
39    /// Enable host USM
40    pub enable_host_usm: bool,
41    /// Enable shared USM
42    pub enable_shared_usm: bool,
43    /// Enable memory pools
44    pub enable_memory_pools: bool,
45    /// Enable async memory operations
46    pub enable_async_ops: bool,
47    /// Memory pool growth size
48    pub pool_growth_size: usize,
49    /// Maximum number of queues
50    pub max_queues: u32,
51    /// Enable profiling
52    pub enable_profiling: bool,
53    /// Enable sub-groups
54    pub enable_sub_groups: bool,
55}
56
57impl Default for OneApiConfig {
58    fn default() -> Self {
59        Self {
60            device_id: 0,
61            enable_usm: true,
62            enable_device_usm: true,
63            enable_host_usm: true,
64            enable_shared_usm: true,
65            enable_memory_pools: true,
66            enable_async_ops: true,
67            pool_growth_size: 64 * 1024 * 1024, // 64MB
68            max_queues: 16,
69            enable_profiling: false,
70            enable_sub_groups: true,
71        }
72    }
73}
74
75/// SYCL device properties
76#[derive(Debug, Clone)]
77pub struct SyclDeviceProperties {
78    pub device_id: u32,
79    pub name: String,
80    pub vendor: String,
81    pub device_type: SyclDeviceType,
82    pub driver_version: String,
83    pub global_memory_size: usize,
84    pub local_memory_size: usize,
85    pub max_work_group_size: u32,
86    pub max_work_item_dimensions: u32,
87    pub max_work_item_sizes: [u32; 3],
88    pub compute_units: u32,
89    pub max_compute_units: u32,
90    pub sub_group_sizes: Vec<u32>,
91    pub preferred_sub_group_size: u32,
92    pub max_constant_buffer_size: usize,
93    pub has_fp64: bool,
94    pub has_fp16: bool,
95    pub has_atomic64: bool,
96    pub usm_device_allocations: bool,
97    pub usm_host_allocations: bool,
98    pub usm_shared_allocations: bool,
99    pub usm_system_allocations: bool,
100}
101
102/// SYCL device types
103#[derive(Debug, Clone, PartialEq)]
104pub enum SyclDeviceType {
105    GPU,
106    CPU,
107    Accelerator,
108    Host,
109    Custom,
110}
111
112/// OneAPI memory types
113#[derive(Debug, Clone, PartialEq, Eq, Hash)]
114pub enum OneApiMemoryType {
115    Device,
116    Host,
117    Shared,
118    System,
119    Buffer,
120}
121
122/// SYCL context for managing device state
123pub struct SyclContext {
124    /// Context handle (simulated)
125    pub handle: *mut c_void,
126    /// Device ID
127    pub device_id: u32,
128    /// Associated device
129    pub device_properties: SyclDeviceProperties,
130    /// Creation time
131    pub created_at: Instant,
132    /// Active queues
133    pub queues: Vec<SyclQueue>,
134    /// USM allocations
135    pub usm_allocations: HashMap<*mut c_void, UsmAllocation>,
136}
137
138/// USM (Unified Shared Memory) allocation info
139#[derive(Debug, Clone)]
140pub struct UsmAllocation {
141    pub ptr: *mut c_void,
142    pub size: usize,
143    pub usm_kind: UsmKind,
144    pub allocated_at: Instant,
145    pub device_id: u32,
146    pub alignment: usize,
147}
148
149/// USM allocation kinds
150#[derive(Debug, Clone, PartialEq)]
151pub enum UsmKind {
152    Device, // Device-only memory
153    Host,   // Host-accessible memory
154    Shared, // Automatically migrating memory
155    System, // System allocator memory
156}
157
158/// SYCL queue for asynchronous operations
159pub struct SyclQueue {
160    /// Queue handle (simulated)
161    pub handle: *mut c_void,
162    /// Queue ID
163    pub id: u32,
164    /// Queue properties
165    pub properties: SyclQueueProperties,
166    /// Creation time
167    pub created_at: Instant,
168    /// Operations queue
169    pub operations: std::collections::VecDeque<SyclOperation>,
170    /// Associated context
171    pub context_id: Option<u32>,
172}
173
174/// SYCL queue properties
175#[derive(Debug, Clone)]
176pub struct SyclQueueProperties {
177    pub in_order: bool,
178    pub enable_profiling: bool,
179    pub priority: SyclQueuePriority,
180}
181
182impl Default for SyclQueueProperties {
183    fn default() -> Self {
184        Self {
185            in_order: false,
186            enable_profiling: false,
187            priority: SyclQueuePriority::Normal,
188        }
189    }
190}
191
192/// SYCL queue priorities
193#[derive(Debug, Clone, PartialEq)]
194pub enum SyclQueuePriority {
195    Low,
196    Normal,
197    High,
198}
199
200/// SYCL asynchronous operation
201#[derive(Debug, Clone)]
202pub struct SyclOperation {
203    pub op_type: SyclOperationType,
204    pub src_ptr: Option<*mut c_void>,
205    pub dst_ptr: Option<*mut c_void>,
206    pub size: usize,
207    pub timestamp: Instant,
208    pub event_handle: Option<*mut c_void>,
209}
210
211/// Types of SYCL operations
212#[derive(Debug, Clone)]
213pub enum SyclOperationType {
214    MemcpyHostToDevice,
215    MemcpyDeviceToHost,
216    MemcpyDeviceToDevice,
217    UsmMemcpy,
218    UsmMemset,
219    KernelSubmit,
220    BarrierWait,
221    Fill,
222}
223
224/// OneAPI memory pool
225pub struct OneApiMemoryPool {
226    /// Memory type
227    memory_type: OneApiMemoryType,
228    /// Pool handle (simulated)
229    handle: *mut c_void,
230    /// Current size
231    current_size: usize,
232    /// Maximum size
233    max_size: usize,
234    /// Used size
235    used_size: usize,
236    /// Free blocks
237    free_blocks: std::collections::VecDeque<OneApiMemoryBlock>,
238    /// Allocated blocks
239    allocated_blocks: HashMap<*mut c_void, OneApiMemoryBlock>,
240    /// USM properties
241    usm_properties: UsmProperties,
242}
243
244/// OneAPI memory block
245#[derive(Debug, Clone)]
246pub struct OneApiMemoryBlock {
247    pub ptr: *mut c_void,
248    pub size: usize,
249    pub memory_type: OneApiMemoryType,
250    pub allocated_at: Instant,
251    pub last_access: Option<Instant>,
252    pub ref_count: u32,
253    pub usm_kind: Option<UsmKind>,
254    pub device_accessible: bool,
255    pub host_accessible: bool,
256}
257
258/// USM memory properties
259#[derive(Debug, Clone)]
260pub struct UsmProperties {
261    pub alignment: usize,
262    pub device_read_only: bool,
263    pub device_access: bool,
264    pub host_access: bool,
265    pub supports_atomics: bool,
266}
267
268impl Default for UsmProperties {
269    fn default() -> Self {
270        Self {
271            alignment: 64, // Common alignment for Intel GPUs
272            device_read_only: false,
273            device_access: true,
274            host_access: false,
275            supports_atomics: true,
276        }
277    }
278}
279
280impl OneApiMemoryPool {
281    pub fn new(memory_type: OneApiMemoryType, max_size: usize) -> Self {
282        let usm_properties = match memory_type {
283            OneApiMemoryType::Device => UsmProperties {
284                alignment: 64,
285                device_read_only: false,
286                device_access: true,
287                host_access: false,
288                supports_atomics: true,
289            },
290            OneApiMemoryType::Host => UsmProperties {
291                alignment: 64,
292                device_read_only: false,
293                device_access: true,
294                host_access: true,
295                supports_atomics: false,
296            },
297            OneApiMemoryType::Shared => UsmProperties {
298                alignment: 64,
299                device_read_only: false,
300                device_access: true,
301                host_access: true,
302                supports_atomics: true,
303            },
304            _ => UsmProperties::default(),
305        };
306
307        Self {
308            memory_type,
309            handle: std::ptr::null_mut(),
310            current_size: 0,
311            max_size,
312            used_size: 0,
313            free_blocks: std::collections::VecDeque::new(),
314            allocated_blocks: HashMap::new(),
315            usm_properties,
316        }
317    }
318
319    /// Allocate from pool
320    pub fn allocate(&mut self, size: usize) -> Result<*mut c_void, OneApiError> {
321        // Try to find suitable free block
322        for i in 0..self.free_blocks.len() {
323            if self.free_blocks[i].size >= size {
324                let mut block = self.free_blocks.remove(i).unwrap();
325
326                // Split block if much larger
327                if block.size > size * 2 {
328                    let remaining_block = OneApiMemoryBlock {
329                        ptr: unsafe { block.ptr.add(size) },
330                        size: block.size - size,
331                        memory_type: block.memory_type.clone(),
332                        allocated_at: block.allocated_at,
333                        last_access: None,
334                        ref_count: 0,
335                        usm_kind: block.usm_kind.clone(),
336                        device_accessible: block.device_accessible,
337                        host_accessible: block.host_accessible,
338                    };
339                    self.free_blocks.push_back(remaining_block);
340                    block.size = size;
341                }
342
343                block.last_access = Some(Instant::now());
344                block.ref_count = 1;
345
346                let ptr = block.ptr;
347                self.allocated_blocks.insert(ptr, block);
348                self.used_size += size;
349
350                return Ok(ptr);
351            }
352        }
353
354        // Need to allocate new memory
355        if self.current_size + size > self.max_size {
356            return Err(OneApiError::OutOfMemory(
357                "Pool size limit exceeded".to_string(),
358            ));
359        }
360
361        let ptr = self.sycl_malloc(size)?;
362        let usm_kind = match self.memory_type {
363            OneApiMemoryType::Device => Some(UsmKind::Device),
364            OneApiMemoryType::Host => Some(UsmKind::Host),
365            OneApiMemoryType::Shared => Some(UsmKind::Shared),
366            OneApiMemoryType::System => Some(UsmKind::System),
367            _ => None,
368        };
369
370        let block = OneApiMemoryBlock {
371            ptr,
372            size,
373            memory_type: self.memory_type.clone(),
374            allocated_at: Instant::now(),
375            last_access: Some(Instant::now()),
376            ref_count: 1,
377            usm_kind,
378            device_accessible: self.usm_properties.device_access,
379            host_accessible: self.usm_properties.host_access,
380        };
381
382        self.allocated_blocks.insert(ptr, block);
383        self.current_size += size;
384        self.used_size += size;
385
386        Ok(ptr)
387    }
388
389    /// Free back to pool
390    pub fn free(&mut self, ptr: *mut c_void) -> Result<(), OneApiError> {
391        if let Some(block) = self.allocated_blocks.remove(&ptr) {
392            self.used_size -= block.size;
393
394            // Add to free blocks
395            self.free_blocks.push_back(OneApiMemoryBlock {
396                ptr: block.ptr,
397                size: block.size,
398                memory_type: block.memory_type,
399                allocated_at: block.allocated_at,
400                last_access: None,
401                ref_count: 0,
402                usm_kind: block.usm_kind,
403                device_accessible: block.device_accessible,
404                host_accessible: block.host_accessible,
405            });
406
407            // Try to coalesce adjacent blocks
408            self.coalesce_free_blocks();
409
410            Ok(())
411        } else {
412            Err(OneApiError::InvalidPointer(
413                "Pointer not found in pool".to_string(),
414            ))
415        }
416    }
417
418    fn coalesce_free_blocks(&mut self) {
419        // Sort free blocks by address
420        let mut blocks: Vec<OneApiMemoryBlock> = self.free_blocks.drain(..).collect();
421        blocks.sort_by_key(|block| block.ptr as usize);
422
423        let mut coalesced = Vec::new();
424        let mut current_block: Option<OneApiMemoryBlock> = None;
425
426        for block in blocks {
427            match current_block.take() {
428                None => current_block = Some(block),
429                Some(mut prev_block) => {
430                    let prev_end = prev_block.ptr as usize + prev_block.size;
431                    let block_start = block.ptr as usize;
432
433                    if prev_end == block_start && prev_block.memory_type == block.memory_type {
434                        // Coalesce blocks
435                        prev_block.size += block.size;
436                        current_block = Some(prev_block);
437                    } else {
438                        coalesced.push(prev_block);
439                        current_block = Some(block);
440                    }
441                }
442            }
443        }
444
445        if let Some(block) = current_block {
446            coalesced.push(block);
447        }
448
449        self.free_blocks = coalesced.into();
450    }
451
452    fn sycl_malloc(&self, size: usize) -> Result<*mut c_void, OneApiError> {
453        // Simulate SYCL USM allocation
454        match self.memory_type {
455            OneApiMemoryType::Device => {
456                // malloc_device equivalent
457                Ok(unsafe {
458                    std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
459                        size,
460                        self.usm_properties.alignment,
461                    )) as *mut c_void
462                })
463            }
464            OneApiMemoryType::Host => {
465                // malloc_host equivalent
466                Ok(unsafe {
467                    std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
468                        size,
469                        self.usm_properties.alignment,
470                    )) as *mut c_void
471                })
472            }
473            OneApiMemoryType::Shared => {
474                // malloc_shared equivalent
475                Ok(unsafe {
476                    std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
477                        size,
478                        self.usm_properties.alignment,
479                    )) as *mut c_void
480                })
481            }
482            OneApiMemoryType::System => {
483                // System malloc
484                Ok(unsafe {
485                    std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
486                        size,
487                        self.usm_properties.alignment,
488                    )) as *mut c_void
489                })
490            }
491            _ => Err(OneApiError::UnsupportedOperation(
492                "Unsupported memory type for allocation".to_string(),
493            )),
494        }
495    }
496}
497
498/// SYCL queue manager
499pub struct SyclQueueManager {
500    /// Available queues
501    queues: Vec<SyclQueue>,
502    /// Queue pool for reuse
503    queue_pool: std::collections::VecDeque<SyclQueue>,
504    /// Next queue ID
505    next_queue_id: u32,
506    /// Configuration
507    config: SyclQueueConfig,
508}
509
510/// Queue manager configuration
511#[derive(Debug, Clone)]
512pub struct SyclQueueConfig {
513    pub default_priority: SyclQueuePriority,
514    pub enable_priorities: bool,
515    pub max_operations_per_queue: usize,
516    pub default_in_order: bool,
517}
518
519impl Default for SyclQueueConfig {
520    fn default() -> Self {
521        Self {
522            default_priority: SyclQueuePriority::Normal,
523            enable_priorities: true,
524            max_operations_per_queue: 1000,
525            default_in_order: false,
526        }
527    }
528}
529
530impl SyclQueueManager {
531    pub fn new(config: SyclQueueConfig) -> Self {
532        Self {
533            queues: Vec::new(),
534            queue_pool: std::collections::VecDeque::new(),
535            next_queue_id: 0,
536            config,
537        }
538    }
539
540    /// Create new queue
541    pub fn create_queue(
542        &mut self,
543        properties: Option<SyclQueueProperties>,
544    ) -> Result<u32, OneApiError> {
545        let queue_id = self.next_queue_id;
546        self.next_queue_id += 1;
547
548        let queue_properties = properties.unwrap_or_else(|| SyclQueueProperties {
549            in_order: self.config.default_in_order,
550            enable_profiling: false,
551            priority: self.config.default_priority.clone(),
552        });
553
554        let queue = SyclQueue {
555            handle: std::ptr::null_mut(), // Would be actual SYCL queue
556            id: queue_id,
557            properties: queue_properties,
558            created_at: Instant::now(),
559            operations: std::collections::VecDeque::new(),
560            context_id: None,
561        };
562
563        self.queues.push(queue);
564        Ok(queue_id)
565    }
566
567    /// Destroy queue
568    pub fn destroy_queue(&mut self, queue_id: u32) -> Result<(), OneApiError> {
569        if let Some(pos) = self.queues.iter().position(|q| q.id == queue_id) {
570            let queue = self.queues.remove(pos);
571            // Clean up queue resources
572            Ok(())
573        } else {
574            Err(OneApiError::InvalidQueue("Queue not found".to_string()))
575        }
576    }
577
578    /// Submit operation to queue
579    pub fn submit_operation(
580        &mut self,
581        queue_id: u32,
582        operation: SyclOperation,
583    ) -> Result<(), OneApiError> {
584        if let Some(queue) = self.queues.iter_mut().find(|q| q.id == queue_id) {
585            if queue.operations.len() >= self.config.max_operations_per_queue {
586                return Err(OneApiError::QueueFull(
587                    "Queue operation limit reached".to_string(),
588                ));
589            }
590
591            queue.operations.push_back(operation);
592            Ok(())
593        } else {
594            Err(OneApiError::InvalidQueue("Queue not found".to_string()))
595        }
596    }
597
598    /// Wait for queue completion
599    pub fn wait_for_queue(&mut self, queue_id: u32) -> Result<(), OneApiError> {
600        // First, collect all operations from the queue
601        let mut operations = Vec::new();
602        if let Some(queue) = self.queues.iter_mut().find(|q| q.id == queue_id) {
603            while let Some(operation) = queue.operations.pop_front() {
604                operations.push(operation);
605            }
606        } else {
607            return Err(OneApiError::InvalidQueue("Queue not found".to_string()));
608        }
609
610        // Now execute all operations
611        for operation in operations {
612            self.execute_operation(operation)?;
613        }
614
615        Ok(())
616    }
617
618    fn execute_operation(&self, operation: SyclOperation) -> Result<(), OneApiError> {
619        // Simulate operation execution
620        match operation.op_type {
621            SyclOperationType::MemcpyHostToDevice => {
622                // Simulate memory copy
623                std::thread::sleep(Duration::from_micros(150));
624            }
625            SyclOperationType::MemcpyDeviceToHost => {
626                // Simulate memory copy
627                std::thread::sleep(Duration::from_micros(150));
628            }
629            SyclOperationType::MemcpyDeviceToDevice => {
630                // Simulate memory copy
631                std::thread::sleep(Duration::from_micros(80));
632            }
633            SyclOperationType::UsmMemcpy => {
634                // Simulate USM memory copy (typically faster)
635                std::thread::sleep(Duration::from_micros(20));
636            }
637            SyclOperationType::KernelSubmit => {
638                // Simulate kernel execution
639                std::thread::sleep(Duration::from_micros(500));
640            }
641            _ => {
642                // Other operations
643            }
644        }
645        Ok(())
646    }
647}
648
649/// OneAPI statistics
650#[derive(Debug, Clone, Default)]
651pub struct OneApiStats {
652    pub total_allocations: u64,
653    pub total_deallocations: u64,
654    pub bytes_allocated: u64,
655    pub bytes_deallocated: u64,
656    pub device_memory_used: usize,
657    pub host_memory_used: usize,
658    pub shared_memory_used: usize,
659    pub usm_allocations: u64,
660    pub queue_operations: u64,
661    pub kernel_submissions: u64,
662    pub memory_transfers: u64,
663    pub average_allocation_time: Duration,
664    pub peak_memory_usage: usize,
665}
666
667impl OneApiMemoryBackend {
668    /// Create new OneAPI backend
669    pub fn new(config: OneApiConfig) -> Result<Self, OneApiError> {
670        // Initialize OneAPI device
671        let device_properties = Self::query_device_properties(config.device_id)?;
672
673        // Create memory pools
674        let mut memory_pools = HashMap::new();
675        if config.enable_memory_pools {
676            let pool_size = device_properties.global_memory_size / 4; // Use 1/4 of total memory
677
678            if config.enable_device_usm {
679                memory_pools.insert(
680                    OneApiMemoryType::Device,
681                    OneApiMemoryPool::new(OneApiMemoryType::Device, pool_size),
682                );
683            }
684
685            if config.enable_host_usm {
686                memory_pools.insert(
687                    OneApiMemoryType::Host,
688                    OneApiMemoryPool::new(OneApiMemoryType::Host, pool_size),
689                );
690            }
691
692            if config.enable_shared_usm {
693                memory_pools.insert(
694                    OneApiMemoryType::Shared,
695                    OneApiMemoryPool::new(OneApiMemoryType::Shared, pool_size / 2),
696                );
697            }
698
699            memory_pools.insert(
700                OneApiMemoryType::System,
701                OneApiMemoryPool::new(OneApiMemoryType::System, pool_size / 4),
702            );
703        }
704
705        let queue_manager = SyclQueueManager::new(SyclQueueConfig::default());
706
707        Ok(Self {
708            config,
709            device_properties,
710            contexts: HashMap::new(),
711            memory_pools,
712            stats: OneApiStats::default(),
713            queue_manager,
714        })
715    }
716
717    /// Query device properties
718    fn query_device_properties(device_id: u32) -> Result<SyclDeviceProperties, OneApiError> {
719        // Simulate querying OneAPI/SYCL device properties
720        Ok(SyclDeviceProperties {
721            device_id,
722            name: format!("Intel GPU {}", device_id),
723            vendor: "Intel Corporation".to_string(),
724            device_type: SyclDeviceType::GPU,
725            driver_version: "1.3.0".to_string(),
726            global_memory_size: 12 * 1024 * 1024 * 1024, // 12GB
727            local_memory_size: 64 * 1024,                // 64KB
728            max_work_group_size: 1024,
729            max_work_item_dimensions: 3,
730            max_work_item_sizes: [1024, 1024, 1024],
731            compute_units: 96,
732            max_compute_units: 96,
733            sub_group_sizes: vec![8, 16, 32],
734            preferred_sub_group_size: 16,
735            max_constant_buffer_size: 64 * 1024,
736            has_fp64: true,
737            has_fp16: true,
738            has_atomic64: true,
739            usm_device_allocations: true,
740            usm_host_allocations: true,
741            usm_shared_allocations: true,
742            usm_system_allocations: true,
743        })
744    }
745
746    /// Allocate memory
747    pub fn allocate(
748        &mut self,
749        size: usize,
750        memory_type: OneApiMemoryType,
751    ) -> Result<*mut c_void, OneApiError> {
752        let start_time = Instant::now();
753
754        let ptr = if self.config.enable_memory_pools {
755            if let Some(pool) = self.memory_pools.get_mut(&memory_type) {
756                pool.allocate(size)?
757            } else {
758                return Err(OneApiError::UnsupportedMemoryType(
759                    "Memory type not supported".to_string(),
760                ));
761            }
762        } else {
763            // Direct allocation
764            self.direct_allocate(size, memory_type.clone())?
765        };
766
767        // Update statistics
768        self.stats.total_allocations += 1;
769        self.stats.bytes_allocated += size as u64;
770
771        match memory_type {
772            OneApiMemoryType::Device => self.stats.device_memory_used += size,
773            OneApiMemoryType::Host => self.stats.host_memory_used += size,
774            OneApiMemoryType::Shared => self.stats.shared_memory_used += size,
775            _ => {}
776        }
777
778        if matches!(
779            memory_type,
780            OneApiMemoryType::Device | OneApiMemoryType::Host | OneApiMemoryType::Shared
781        ) {
782            self.stats.usm_allocations += 1;
783        }
784
785        let allocation_time = start_time.elapsed();
786        let total_time = self.stats.average_allocation_time.as_nanos() as u64
787            * (self.stats.total_allocations - 1)
788            + allocation_time.as_nanos() as u64;
789        self.stats.average_allocation_time =
790            Duration::from_nanos(total_time / self.stats.total_allocations);
791
792        let current_usage = self.stats.device_memory_used
793            + self.stats.host_memory_used
794            + self.stats.shared_memory_used;
795        if current_usage > self.stats.peak_memory_usage {
796            self.stats.peak_memory_usage = current_usage;
797        }
798
799        Ok(ptr)
800    }
801
802    fn direct_allocate(
803        &self,
804        size: usize,
805        memory_type: OneApiMemoryType,
806    ) -> Result<*mut c_void, OneApiError> {
807        // Simulate direct SYCL allocation
808        let alignment = 64; // Common alignment for Intel GPUs
809
810        match memory_type {
811            OneApiMemoryType::Device => {
812                // malloc_device
813                Ok(unsafe {
814                    std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
815                        size, alignment,
816                    )) as *mut c_void
817                })
818            }
819            OneApiMemoryType::Host => {
820                // malloc_host
821                Ok(unsafe {
822                    std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
823                        size, alignment,
824                    )) as *mut c_void
825                })
826            }
827            OneApiMemoryType::Shared => {
828                // malloc_shared
829                Ok(unsafe {
830                    std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
831                        size, alignment,
832                    )) as *mut c_void
833                })
834            }
835            OneApiMemoryType::System => {
836                // System malloc
837                Ok(unsafe {
838                    std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
839                        size, alignment,
840                    )) as *mut c_void
841                })
842            }
843            _ => Err(OneApiError::UnsupportedMemoryType(
844                "Unsupported memory type".to_string(),
845            )),
846        }
847    }
848
849    /// Free memory
850    pub fn free(
851        &mut self,
852        ptr: *mut c_void,
853        memory_type: OneApiMemoryType,
854    ) -> Result<(), OneApiError> {
855        if self.config.enable_memory_pools {
856            if let Some(pool) = self.memory_pools.get_mut(&memory_type) {
857                pool.free(ptr)?;
858            } else {
859                return Err(OneApiError::UnsupportedMemoryType(
860                    "Memory type not supported".to_string(),
861                ));
862            }
863        } else {
864            // Direct deallocation
865            unsafe {
866                std::alloc::dealloc(
867                    ptr as *mut u8,
868                    std::alloc::Layout::from_size_align_unchecked(1, 1),
869                );
870            }
871        }
872
873        self.stats.total_deallocations += 1;
874        Ok(())
875    }
876
877    /// USM memory copy
878    pub fn usm_memcpy(
879        &mut self,
880        dst: *mut c_void,
881        src: *const c_void,
882        size: usize,
883        queue_id: u32,
884    ) -> Result<(), OneApiError> {
885        let operation = SyclOperation {
886            op_type: SyclOperationType::UsmMemcpy,
887            src_ptr: Some(src as *mut c_void),
888            dst_ptr: Some(dst),
889            size,
890            timestamp: Instant::now(),
891            event_handle: None,
892        };
893
894        self.queue_manager.submit_operation(queue_id, operation)?;
895        self.stats.memory_transfers += 1;
896        Ok(())
897    }
898
899    /// Create SYCL context
900    pub fn create_context(&mut self) -> Result<u32, OneApiError> {
901        let context_id = self.contexts.len() as u32;
902
903        let context = SyclContext {
904            handle: std::ptr::null_mut(), // Would be actual SYCL context
905            device_id: self.config.device_id,
906            device_properties: self.device_properties.clone(),
907            created_at: Instant::now(),
908            queues: Vec::new(),
909            usm_allocations: HashMap::new(),
910        };
911
912        self.contexts.insert(context_id, context);
913        Ok(context_id)
914    }
915
916    /// Create queue
917    pub fn create_queue(
918        &mut self,
919        properties: Option<SyclQueueProperties>,
920    ) -> Result<u32, OneApiError> {
921        self.queue_manager.create_queue(properties)
922    }
923
924    /// Destroy queue
925    pub fn destroy_queue(&mut self, queue_id: u32) -> Result<(), OneApiError> {
926        self.queue_manager.destroy_queue(queue_id)
927    }
928
929    /// Wait for all queues
930    pub fn wait_all(&mut self) -> Result<(), OneApiError> {
931        let queue_ids: Vec<u32> = self.queue_manager.queues.iter().map(|q| q.id).collect();
932        for queue_id in queue_ids {
933            self.queue_manager.wait_for_queue(queue_id)?;
934        }
935        Ok(())
936    }
937
938    /// Get device properties
939    pub fn get_device_properties(&self) -> &SyclDeviceProperties {
940        &self.device_properties
941    }
942
943    /// Get statistics
944    pub fn get_stats(&self) -> &OneApiStats {
945        &self.stats
946    }
947
948    /// Query USM pointer information
949    pub fn query_usm_ptr(&self, ptr: *mut c_void) -> Result<UsmAllocation, OneApiError> {
950        // In a real implementation, this would query the actual USM pointer
951        // For now, return default information
952        Ok(UsmAllocation {
953            ptr,
954            size: 0, // Would need to track actual size
955            usm_kind: UsmKind::Device,
956            allocated_at: Instant::now(),
957            device_id: self.config.device_id,
958            alignment: 64,
959        })
960    }
961}
962
963// Safety: OneApiMemoryBackend manages OneAPI/SYCL GPU memory pointers via *mut c_void.
964// While raw pointers are not Send/Sync by default, it's safe to share across threads
965// when protected by Arc<Mutex<>> because:
966// 1. All pointers point to SYCL USM memory managed by the OneAPI runtime
967// 2. The Mutex provides exclusive access for all mutable operations
968// 3. No thread-local state is maintained
969unsafe impl Send for OneApiMemoryBackend {}
970unsafe impl Sync for OneApiMemoryBackend {}
971
972/// OneAPI errors
973#[derive(Debug, Clone)]
974pub enum OneApiError {
975    DeviceNotFound(String),
976    OutOfMemory(String),
977    InvalidPointer(String),
978    InvalidQueue(String),
979    QueueFull(String),
980    UnsupportedOperation(String),
981    UnsupportedMemoryType(String),
982    ContextCreationFailed(String),
983    KernelSubmissionFailed(String),
984    SynchronizationFailed(String),
985    InternalError(String),
986}
987
988impl std::fmt::Display for OneApiError {
989    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
990        match self {
991            OneApiError::DeviceNotFound(msg) => write!(f, "Device not found: {}", msg),
992            OneApiError::OutOfMemory(msg) => write!(f, "Out of memory: {}", msg),
993            OneApiError::InvalidPointer(msg) => write!(f, "Invalid pointer: {}", msg),
994            OneApiError::InvalidQueue(msg) => write!(f, "Invalid queue: {}", msg),
995            OneApiError::QueueFull(msg) => write!(f, "Queue full: {}", msg),
996            OneApiError::UnsupportedOperation(msg) => write!(f, "Unsupported operation: {}", msg),
997            OneApiError::UnsupportedMemoryType(msg) => {
998                write!(f, "Unsupported memory type: {}", msg)
999            }
1000            OneApiError::ContextCreationFailed(msg) => {
1001                write!(f, "Context creation failed: {}", msg)
1002            }
1003            OneApiError::KernelSubmissionFailed(msg) => {
1004                write!(f, "Kernel submission failed: {}", msg)
1005            }
1006            OneApiError::SynchronizationFailed(msg) => write!(f, "Synchronization failed: {}", msg),
1007            OneApiError::InternalError(msg) => write!(f, "Internal error: {}", msg),
1008        }
1009    }
1010}
1011
1012impl std::error::Error for OneApiError {}
1013
1014/// Thread-safe OneAPI backend wrapper
1015pub struct ThreadSafeOneApiBackend {
1016    backend: Arc<Mutex<OneApiMemoryBackend>>,
1017}
1018
1019impl ThreadSafeOneApiBackend {
1020    pub fn new(config: OneApiConfig) -> Result<Self, OneApiError> {
1021        let backend = OneApiMemoryBackend::new(config)?;
1022        Ok(Self {
1023            backend: Arc::new(Mutex::new(backend)),
1024        })
1025    }
1026
1027    pub fn allocate(
1028        &self,
1029        size: usize,
1030        memory_type: OneApiMemoryType,
1031    ) -> Result<*mut c_void, OneApiError> {
1032        let mut backend = self.backend.lock().unwrap();
1033        backend.allocate(size, memory_type)
1034    }
1035
1036    pub fn free(&self, ptr: *mut c_void, memory_type: OneApiMemoryType) -> Result<(), OneApiError> {
1037        let mut backend = self.backend.lock().unwrap();
1038        backend.free(ptr, memory_type)
1039    }
1040
1041    pub fn get_stats(&self) -> OneApiStats {
1042        let backend = self.backend.lock().unwrap();
1043        backend.get_stats().clone()
1044    }
1045}
1046
1047#[cfg(test)]
1048mod tests {
1049    use super::*;
1050
1051    #[test]
1052    fn test_oneapi_backend_creation() {
1053        let config = OneApiConfig::default();
1054        let backend = OneApiMemoryBackend::new(config);
1055        assert!(backend.is_ok());
1056    }
1057
1058    #[test]
1059    fn test_memory_pool() {
1060        let mut pool = OneApiMemoryPool::new(OneApiMemoryType::Device, 1024 * 1024);
1061        let ptr = pool.allocate(1024);
1062        assert!(ptr.is_ok());
1063
1064        let ptr = ptr.unwrap();
1065        let result = pool.free(ptr);
1066        assert!(result.is_ok());
1067    }
1068
1069    #[test]
1070    fn test_sycl_queue_manager() {
1071        let mut manager = SyclQueueManager::new(SyclQueueConfig::default());
1072        let queue_id = manager.create_queue(None);
1073        assert!(queue_id.is_ok());
1074
1075        let queue_id = queue_id.unwrap();
1076        let result = manager.destroy_queue(queue_id);
1077        assert!(result.is_ok());
1078    }
1079
1080    #[test]
1081    fn test_thread_safe_backend() {
1082        let config = OneApiConfig::default();
1083        let backend = ThreadSafeOneApiBackend::new(config);
1084        assert!(backend.is_ok());
1085
1086        let backend = backend.unwrap();
1087        let stats = backend.get_stats();
1088        assert_eq!(stats.total_allocations, 0);
1089    }
1090}