optirs_gpu/memory/vendors/
metal_backend.rs

1// Metal backend for GPU memory management
2//
3// This module provides Apple Metal-specific memory management functionality,
4// including device memory allocation, Metal command buffers, and performance
5// optimization features specific to Apple Silicon GPUs.
6
7#[allow(dead_code)]
8use std::collections::HashMap;
9use std::ffi::c_void;
10use std::ptr::NonNull;
11use std::sync::{Arc, Mutex};
12use std::time::{Duration, Instant};
13
14/// Metal memory backend implementation
15pub struct MetalMemoryBackend {
16    /// Backend configuration
17    config: MetalConfig,
18    /// Device properties
19    device_properties: MetalDeviceProperties,
20    /// Active Metal devices
21    devices: HashMap<u32, MetalDevice>,
22    /// Memory pools
23    memory_pools: HashMap<MetalMemoryType, MetalMemoryPool>,
24    /// Statistics
25    stats: MetalStats,
26    /// Command queue management
27    command_manager: MetalCommandManager,
28}
29
30/// Metal backend configuration
31#[derive(Debug, Clone)]
32pub struct MetalConfig {
33    /// Device ID to use
34    pub device_id: u32,
35    /// Enable private memory
36    pub enable_private_memory: bool,
37    /// Enable shared memory
38    pub enable_shared_memory: bool,
39    /// Enable managed memory
40    pub enable_managed_memory: bool,
41    /// Enable memory pools
42    pub enable_memory_pools: bool,
43    /// Enable async memory operations
44    pub enable_async_ops: bool,
45    /// Memory pool growth size
46    pub pool_growth_size: usize,
47    /// Enable memoryless render targets
48    pub enable_memoryless_targets: bool,
49    /// Maximum number of command queues
50    pub max_command_queues: u32,
51    /// Enable Metal Performance Shaders
52    pub enable_mps: bool,
53    /// Enable heap-based allocation
54    pub enable_heap_allocation: bool,
55}
56
57impl Default for MetalConfig {
58    fn default() -> Self {
59        Self {
60            device_id: 0,
61            enable_private_memory: true,
62            enable_shared_memory: true,
63            enable_managed_memory: true,
64            enable_memory_pools: true,
65            enable_async_ops: true,
66            pool_growth_size: 64 * 1024 * 1024, // 64MB
67            enable_memoryless_targets: false,
68            max_command_queues: 8,
69            enable_mps: true,
70            enable_heap_allocation: true,
71        }
72    }
73}
74
75/// Metal device properties
76#[derive(Debug, Clone)]
77pub struct MetalDeviceProperties {
78    pub device_id: u32,
79    pub name: String,
80    pub device_type: MetalDeviceType,
81    pub family: MetalGPUFamily,
82    pub max_threads_per_threadgroup: u32,
83    pub threadgroup_memory_length: u32,
84    pub max_buffer_length: usize,
85    pub max_texture_size_2d: u32,
86    pub max_texture_size_3d: u32,
87    pub unified_memory: bool,
88    pub discrete_memory: bool,
89    pub low_power: bool,
90    pub headless: bool,
91    pub supports_shader_debugging: bool,
92    pub supports_function_pointers: bool,
93    pub supports_dynamic_libraries: bool,
94    pub supports_render_dynamic_libraries: bool,
95    pub recommended_max_working_set_size: usize,
96    pub max_transfer_rate: u64,
97    pub has_unified_memory: bool,
98}
99
100/// Metal device types
101#[derive(Debug, Clone, PartialEq)]
102pub enum MetalDeviceType {
103    Integrated,
104    Discrete,
105    External,
106    Virtual,
107}
108
109/// Metal GPU families (Apple Silicon generations)
110#[derive(Debug, Clone, PartialEq)]
111pub enum MetalGPUFamily {
112    Apple1, // A7
113    Apple2, // A8
114    Apple3, // A9, A10
115    Apple4, // A11
116    Apple5, // A12, A13
117    Apple6, // A14, M1
118    Apple7, // A15, M1 Pro, M1 Max
119    Apple8, // A16, M2
120    Apple9, // M2 Pro, M2 Max, M3
121    Mac1,   // Intel Iris Pro
122    Mac2,   // Intel Iris Pro, AMD
123}
124
125/// Metal memory types
126#[derive(Debug, Clone, PartialEq, Eq, Hash)]
127pub enum MetalMemoryType {
128    Private,    // GPU-only memory
129    Shared,     // CPU-GPU shared memory
130    Managed,    // Automatically managed memory
131    Memoryless, // Tile memory (iOS only)
132}
133
134/// Metal device abstraction
135pub struct MetalDevice {
136    /// Device handle (simulated)
137    pub handle: *mut c_void,
138    /// Device ID
139    pub device_id: u32,
140    /// Device properties
141    pub properties: MetalDeviceProperties,
142    /// Creation time
143    pub created_at: Instant,
144    /// Command queues
145    pub command_queues: Vec<MetalCommandQueue>,
146    /// Memory heaps
147    pub heaps: HashMap<usize, MetalHeap>,
148    /// Active resources
149    pub resources: HashMap<*mut c_void, MetalResource>,
150}
151
152/// Metal command queue for GPU operations
153pub struct MetalCommandQueue {
154    /// Queue handle (simulated)
155    pub handle: *mut c_void,
156    /// Queue ID
157    pub id: u32,
158    /// Queue label
159    pub label: Option<String>,
160    /// Creation time
161    pub created_at: Instant,
162    /// Command buffers
163    pub command_buffers: std::collections::VecDeque<MetalCommandBuffer>,
164    /// Queue priority
165    pub priority: MetalQueuePriority,
166}
167
168/// Metal queue priorities
169#[derive(Debug, Clone, PartialEq)]
170pub enum MetalQueuePriority {
171    High,
172    Normal,
173    Low,
174    Background,
175}
176
177/// Metal command buffer
178#[derive(Debug, Clone)]
179pub struct MetalCommandBuffer {
180    pub buffer_id: u32,
181    pub commands: Vec<MetalCommand>,
182    pub timestamp: Instant,
183    pub committed: bool,
184    pub completed: bool,
185}
186
187/// Metal GPU commands
188#[derive(Debug, Clone)]
189pub enum MetalCommand {
190    BlitCommand {
191        src_buffer: *mut c_void,
192        dst_buffer: *mut c_void,
193        size: usize,
194    },
195    ComputeCommand {
196        kernel_id: u32,
197        threadgroup_size: (u32, u32, u32),
198        threadgroups: (u32, u32, u32),
199    },
200    RenderCommand {
201        render_pass: u32,
202    },
203    MemoryBarrier,
204}
205
206/// Metal memory pool
207pub struct MetalMemoryPool {
208    /// Memory type
209    memory_type: MetalMemoryType,
210    /// Pool handle (simulated)
211    handle: *mut c_void,
212    /// Current size
213    current_size: usize,
214    /// Maximum size
215    max_size: usize,
216    /// Used size
217    used_size: usize,
218    /// Free blocks
219    free_blocks: std::collections::VecDeque<MetalMemoryBlock>,
220    /// Allocated blocks
221    allocated_blocks: HashMap<*mut c_void, MetalMemoryBlock>,
222    /// Storage mode
223    storage_mode: MetalStorageMode,
224    /// Cache mode
225    cache_mode: MetalCacheMode,
226}
227
228/// Metal memory block
229#[derive(Debug, Clone)]
230pub struct MetalMemoryBlock {
231    pub ptr: *mut c_void,
232    pub size: usize,
233    pub memory_type: MetalMemoryType,
234    pub allocated_at: Instant,
235    pub last_access: Option<Instant>,
236    pub ref_count: u32,
237    pub storage_mode: MetalStorageMode,
238    pub cache_mode: MetalCacheMode,
239    pub gpu_address: Option<u64>,
240}
241
242/// Metal storage modes
243#[derive(Debug, Clone, PartialEq)]
244pub enum MetalStorageMode {
245    Shared,     // CPU and GPU accessible
246    Managed,    // Managed by Metal
247    Private,    // GPU-only
248    Memoryless, // Tile memory
249}
250
251/// Metal cache modes
252#[derive(Debug, Clone, PartialEq)]
253pub enum MetalCacheMode {
254    DefaultCache,
255    WriteCombined,
256}
257
258/// Metal heap for resource allocation
259pub struct MetalHeap {
260    /// Heap handle (simulated)
261    pub handle: *mut c_void,
262    /// Heap ID
263    pub id: usize,
264    /// Size
265    pub size: usize,
266    /// Used size
267    pub used_size: usize,
268    /// Storage mode
269    pub storage_mode: MetalStorageMode,
270    /// CPU cache mode
271    pub cpu_cache_mode: MetalCacheMode,
272    /// Allocated resources
273    pub resources: HashMap<*mut c_void, MetalResource>,
274}
275
276/// Metal resource (buffer, texture, etc.)
277#[derive(Debug, Clone)]
278pub struct MetalResource {
279    pub ptr: *mut c_void,
280    pub size: usize,
281    pub resource_type: MetalResourceType,
282    pub storage_mode: MetalStorageMode,
283    pub allocated_at: Instant,
284    pub heap_offset: Option<usize>,
285}
286
287/// Metal resource types
288#[derive(Debug, Clone, PartialEq)]
289pub enum MetalResourceType {
290    Buffer,
291    Texture1D,
292    Texture2D,
293    Texture3D,
294    TextureCube,
295}
296
297impl MetalMemoryPool {
298    pub fn new(memory_type: MetalMemoryType, max_size: usize) -> Self {
299        let (storage_mode, cache_mode) = match memory_type {
300            MetalMemoryType::Private => (MetalStorageMode::Private, MetalCacheMode::DefaultCache),
301            MetalMemoryType::Shared => (MetalStorageMode::Shared, MetalCacheMode::DefaultCache),
302            MetalMemoryType::Managed => (MetalStorageMode::Managed, MetalCacheMode::DefaultCache),
303            MetalMemoryType::Memoryless => {
304                (MetalStorageMode::Memoryless, MetalCacheMode::DefaultCache)
305            }
306        };
307
308        Self {
309            memory_type,
310            handle: std::ptr::null_mut(),
311            current_size: 0,
312            max_size,
313            used_size: 0,
314            free_blocks: std::collections::VecDeque::new(),
315            allocated_blocks: HashMap::new(),
316            storage_mode,
317            cache_mode,
318        }
319    }
320
321    /// Allocate from pool
322    pub fn allocate(&mut self, size: usize) -> Result<*mut c_void, MetalError> {
323        // Try to find suitable free block
324        for i in 0..self.free_blocks.len() {
325            if self.free_blocks[i].size >= size {
326                let mut block = self.free_blocks.remove(i).unwrap();
327
328                // Split block if much larger
329                if block.size > size * 2 {
330                    let remaining_block = MetalMemoryBlock {
331                        ptr: unsafe { block.ptr.add(size) },
332                        size: block.size - size,
333                        memory_type: block.memory_type.clone(),
334                        allocated_at: block.allocated_at,
335                        last_access: None,
336                        ref_count: 0,
337                        storage_mode: block.storage_mode.clone(),
338                        cache_mode: block.cache_mode.clone(),
339                        gpu_address: None,
340                    };
341                    self.free_blocks.push_back(remaining_block);
342                    block.size = size;
343                }
344
345                block.last_access = Some(Instant::now());
346                block.ref_count = 1;
347
348                let ptr = block.ptr;
349                self.allocated_blocks.insert(ptr, block);
350                self.used_size += size;
351
352                return Ok(ptr);
353            }
354        }
355
356        // Need to allocate new memory
357        if self.current_size + size > self.max_size {
358            return Err(MetalError::OutOfMemory(
359                "Pool size limit exceeded".to_string(),
360            ));
361        }
362
363        let ptr = self.metal_allocate(size)?;
364        let block = MetalMemoryBlock {
365            ptr,
366            size,
367            memory_type: self.memory_type.clone(),
368            allocated_at: Instant::now(),
369            last_access: Some(Instant::now()),
370            ref_count: 1,
371            storage_mode: self.storage_mode.clone(),
372            cache_mode: self.cache_mode.clone(),
373            gpu_address: Some(ptr as u64), // Simulate GPU address
374        };
375
376        self.allocated_blocks.insert(ptr, block);
377        self.current_size += size;
378        self.used_size += size;
379
380        Ok(ptr)
381    }
382
383    /// Free back to pool
384    pub fn free(&mut self, ptr: *mut c_void) -> Result<(), MetalError> {
385        if let Some(block) = self.allocated_blocks.remove(&ptr) {
386            self.used_size -= block.size;
387
388            // Add to free blocks
389            self.free_blocks.push_back(MetalMemoryBlock {
390                ptr: block.ptr,
391                size: block.size,
392                memory_type: block.memory_type,
393                allocated_at: block.allocated_at,
394                last_access: None,
395                ref_count: 0,
396                storage_mode: block.storage_mode,
397                cache_mode: block.cache_mode,
398                gpu_address: block.gpu_address,
399            });
400
401            // Try to coalesce adjacent blocks
402            self.coalesce_free_blocks();
403
404            Ok(())
405        } else {
406            Err(MetalError::InvalidPointer(
407                "Pointer not found in pool".to_string(),
408            ))
409        }
410    }
411
412    fn coalesce_free_blocks(&mut self) {
413        // Sort free blocks by address
414        let mut blocks: Vec<MetalMemoryBlock> = self.free_blocks.drain(..).collect();
415        blocks.sort_by_key(|block| block.ptr as usize);
416
417        let mut coalesced = Vec::new();
418        let mut current_block: Option<MetalMemoryBlock> = None;
419
420        for block in blocks {
421            match current_block.take() {
422                None => current_block = Some(block),
423                Some(mut prev_block) => {
424                    let prev_end = prev_block.ptr as usize + prev_block.size;
425                    let block_start = block.ptr as usize;
426
427                    if prev_end == block_start && prev_block.memory_type == block.memory_type {
428                        // Coalesce blocks
429                        prev_block.size += block.size;
430                        current_block = Some(prev_block);
431                    } else {
432                        coalesced.push(prev_block);
433                        current_block = Some(block);
434                    }
435                }
436            }
437        }
438
439        if let Some(block) = current_block {
440            coalesced.push(block);
441        }
442
443        self.free_blocks = coalesced.into();
444    }
445
446    fn metal_allocate(&self, size: usize) -> Result<*mut c_void, MetalError> {
447        // Simulate Metal buffer allocation
448        let alignment = match self.memory_type {
449            MetalMemoryType::Private => 64,    // GPU alignment
450            MetalMemoryType::Shared => 16,     // CPU-GPU shared
451            MetalMemoryType::Managed => 16,    // Managed memory
452            MetalMemoryType::Memoryless => 64, // Tile memory
453        };
454
455        match self.memory_type {
456            MetalMemoryType::Private => {
457                // MTLBuffer with private storage
458                Ok(unsafe {
459                    std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
460                        size, alignment,
461                    )) as *mut c_void
462                })
463            }
464            MetalMemoryType::Shared => {
465                // MTLBuffer with shared storage
466                Ok(unsafe {
467                    std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
468                        size, alignment,
469                    )) as *mut c_void
470                })
471            }
472            MetalMemoryType::Managed => {
473                // MTLBuffer with managed storage
474                Ok(unsafe {
475                    std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
476                        size, alignment,
477                    )) as *mut c_void
478                })
479            }
480            MetalMemoryType::Memoryless => {
481                // Memoryless render target (tile memory)
482                if size > 8 * 1024 * 1024 {
483                    // 8MB tile memory limit
484                    return Err(MetalError::UnsupportedOperation(
485                        "Memoryless allocation too large".to_string(),
486                    ));
487                }
488                Ok(unsafe {
489                    std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
490                        size, alignment,
491                    )) as *mut c_void
492                })
493            }
494        }
495    }
496}
497
498/// Metal command manager
499pub struct MetalCommandManager {
500    /// Command queues
501    queues: Vec<MetalCommandQueue>,
502    /// Next queue ID
503    next_queue_id: u32,
504    /// Next command buffer ID
505    next_buffer_id: u32,
506    /// Configuration
507    config: MetalCommandConfig,
508}
509
510/// Command manager configuration
511#[derive(Debug, Clone)]
512pub struct MetalCommandConfig {
513    pub max_command_buffers_per_queue: usize,
514    pub enable_command_buffer_reuse: bool,
515    pub enable_parallel_encoding: bool,
516}
517
518impl Default for MetalCommandConfig {
519    fn default() -> Self {
520        Self {
521            max_command_buffers_per_queue: 64,
522            enable_command_buffer_reuse: true,
523            enable_parallel_encoding: true,
524        }
525    }
526}
527
528impl MetalCommandManager {
529    pub fn new(config: MetalCommandConfig) -> Self {
530        Self {
531            queues: Vec::new(),
532            next_queue_id: 0,
533            next_buffer_id: 0,
534            config,
535        }
536    }
537
538    /// Create command queue
539    pub fn create_command_queue(
540        &mut self,
541        label: Option<String>,
542        priority: MetalQueuePriority,
543    ) -> Result<u32, MetalError> {
544        let queue_id = self.next_queue_id;
545        self.next_queue_id += 1;
546
547        let queue = MetalCommandQueue {
548            handle: std::ptr::null_mut(),
549            id: queue_id,
550            label,
551            created_at: Instant::now(),
552            command_buffers: std::collections::VecDeque::new(),
553            priority,
554        };
555
556        self.queues.push(queue);
557        Ok(queue_id)
558    }
559
560    /// Create command buffer
561    pub fn create_command_buffer(&mut self, queue_id: u32) -> Result<u32, MetalError> {
562        if let Some(queue) = self.queues.iter_mut().find(|q| q.id == queue_id) {
563            if queue.command_buffers.len() >= self.config.max_command_buffers_per_queue {
564                return Err(MetalError::QueueFull("Command queue is full".to_string()));
565            }
566
567            let buffer_id = self.next_buffer_id;
568            self.next_buffer_id += 1;
569
570            let command_buffer = MetalCommandBuffer {
571                buffer_id,
572                commands: Vec::new(),
573                timestamp: Instant::now(),
574                committed: false,
575                completed: false,
576            };
577
578            queue.command_buffers.push_back(command_buffer);
579            Ok(buffer_id)
580        } else {
581            Err(MetalError::InvalidQueue("Queue not found".to_string()))
582        }
583    }
584
585    /// Add command to buffer
586    pub fn add_command(
587        &mut self,
588        queue_id: u32,
589        buffer_id: u32,
590        command: MetalCommand,
591    ) -> Result<(), MetalError> {
592        if let Some(queue) = self.queues.iter_mut().find(|q| q.id == queue_id) {
593            if let Some(buffer) = queue
594                .command_buffers
595                .iter_mut()
596                .find(|b| b.buffer_id == buffer_id)
597            {
598                if buffer.committed {
599                    return Err(MetalError::InvalidOperation(
600                        "Command buffer already committed".to_string(),
601                    ));
602                }
603                buffer.commands.push(command);
604                Ok(())
605            } else {
606                Err(MetalError::InvalidCommandBuffer(
607                    "Command buffer not found".to_string(),
608                ))
609            }
610        } else {
611            Err(MetalError::InvalidQueue("Queue not found".to_string()))
612        }
613    }
614
615    /// Commit command buffer
616    pub fn commit_command_buffer(
617        &mut self,
618        queue_id: u32,
619        buffer_id: u32,
620    ) -> Result<(), MetalError> {
621        if let Some(queue) = self.queues.iter_mut().find(|q| q.id == queue_id) {
622            if let Some(buffer) = queue
623                .command_buffers
624                .iter_mut()
625                .find(|b| b.buffer_id == buffer_id)
626            {
627                buffer.committed = true;
628                // Simulate command execution
629                std::thread::sleep(Duration::from_micros(50));
630                buffer.completed = true;
631                Ok(())
632            } else {
633                Err(MetalError::InvalidCommandBuffer(
634                    "Command buffer not found".to_string(),
635                ))
636            }
637        } else {
638            Err(MetalError::InvalidQueue("Queue not found".to_string()))
639        }
640    }
641
642    /// Wait for completion
643    pub fn wait_until_completed(
644        &mut self,
645        queue_id: u32,
646        buffer_id: u32,
647    ) -> Result<(), MetalError> {
648        if let Some(queue) = self.queues.iter().find(|q| q.id == queue_id) {
649            if let Some(buffer) = queue
650                .command_buffers
651                .iter()
652                .find(|b| b.buffer_id == buffer_id)
653            {
654                if buffer.completed {
655                    Ok(())
656                } else {
657                    // In a real implementation, this would poll the Metal API
658                    // For now, assume completion after a short delay
659                    std::thread::sleep(Duration::from_micros(50));
660                    Ok(())
661                }
662            } else {
663                Err(MetalError::InvalidCommandBuffer(
664                    "Command buffer not found".to_string(),
665                ))
666            }
667        } else {
668            Err(MetalError::InvalidQueue("Queue not found".to_string()))
669        }
670    }
671}
672
673/// Metal statistics
674#[derive(Debug, Clone, Default)]
675pub struct MetalStats {
676    pub total_allocations: u64,
677    pub total_deallocations: u64,
678    pub bytes_allocated: u64,
679    pub bytes_deallocated: u64,
680    pub private_memory_used: usize,
681    pub shared_memory_used: usize,
682    pub managed_memory_used: usize,
683    pub command_buffers_created: u64,
684    pub command_buffers_completed: u64,
685    pub compute_commands: u64,
686    pub blit_commands: u64,
687    pub render_commands: u64,
688    pub average_allocation_time: Duration,
689    pub peak_memory_usage: usize,
690}
691
692impl MetalMemoryBackend {
693    /// Create new Metal backend
694    pub fn new(config: MetalConfig) -> Result<Self, MetalError> {
695        // Query Metal device
696        let device_properties = Self::query_device_properties(config.device_id)?;
697
698        // Create memory pools
699        let mut memory_pools = HashMap::new();
700        if config.enable_memory_pools {
701            let pool_size = device_properties.recommended_max_working_set_size / 4;
702
703            if config.enable_private_memory {
704                memory_pools.insert(
705                    MetalMemoryType::Private,
706                    MetalMemoryPool::new(MetalMemoryType::Private, pool_size),
707                );
708            }
709
710            if config.enable_shared_memory {
711                memory_pools.insert(
712                    MetalMemoryType::Shared,
713                    MetalMemoryPool::new(MetalMemoryType::Shared, pool_size),
714                );
715            }
716
717            if config.enable_managed_memory {
718                memory_pools.insert(
719                    MetalMemoryType::Managed,
720                    MetalMemoryPool::new(MetalMemoryType::Managed, pool_size),
721                );
722            }
723        }
724
725        let command_manager = MetalCommandManager::new(MetalCommandConfig::default());
726
727        Ok(Self {
728            config,
729            device_properties,
730            devices: HashMap::new(),
731            memory_pools,
732            stats: MetalStats::default(),
733            command_manager,
734        })
735    }
736
737    /// Query device properties
738    fn query_device_properties(device_id: u32) -> Result<MetalDeviceProperties, MetalError> {
739        // Simulate querying Metal device properties
740        Ok(MetalDeviceProperties {
741            device_id,
742            name: "Apple M1 Pro".to_string(),
743            device_type: MetalDeviceType::Integrated,
744            family: MetalGPUFamily::Apple7,
745            max_threads_per_threadgroup: 1024,
746            threadgroup_memory_length: 32768,
747            max_buffer_length: 2 * 1024 * 1024 * 1024, // 2GB
748            max_texture_size_2d: 16384,
749            max_texture_size_3d: 2048,
750            unified_memory: true,
751            discrete_memory: false,
752            low_power: false,
753            headless: false,
754            supports_shader_debugging: true,
755            supports_function_pointers: true,
756            supports_dynamic_libraries: true,
757            supports_render_dynamic_libraries: true,
758            recommended_max_working_set_size: 32 * 1024 * 1024 * 1024, // 32GB
759            max_transfer_rate: 400_000_000_000,                        // 400 GB/s
760            has_unified_memory: true,
761        })
762    }
763
764    /// Allocate memory
765    pub fn allocate(
766        &mut self,
767        size: usize,
768        memory_type: MetalMemoryType,
769    ) -> Result<*mut c_void, MetalError> {
770        let start_time = Instant::now();
771
772        let ptr = if self.config.enable_memory_pools {
773            if let Some(pool) = self.memory_pools.get_mut(&memory_type) {
774                pool.allocate(size)?
775            } else {
776                return Err(MetalError::UnsupportedMemoryType(
777                    "Memory type not supported".to_string(),
778                ));
779            }
780        } else {
781            // Direct allocation
782            self.direct_allocate(size, memory_type.clone())?
783        };
784
785        // Update statistics
786        self.stats.total_allocations += 1;
787        self.stats.bytes_allocated += size as u64;
788
789        match memory_type {
790            MetalMemoryType::Private => self.stats.private_memory_used += size,
791            MetalMemoryType::Shared => self.stats.shared_memory_used += size,
792            MetalMemoryType::Managed => self.stats.managed_memory_used += size,
793            _ => {}
794        }
795
796        let allocation_time = start_time.elapsed();
797        let total_time = self.stats.average_allocation_time.as_nanos() as u64
798            * (self.stats.total_allocations - 1)
799            + allocation_time.as_nanos() as u64;
800        self.stats.average_allocation_time =
801            Duration::from_nanos(total_time / self.stats.total_allocations);
802
803        let current_usage = self.stats.private_memory_used
804            + self.stats.shared_memory_used
805            + self.stats.managed_memory_used;
806        if current_usage > self.stats.peak_memory_usage {
807            self.stats.peak_memory_usage = current_usage;
808        }
809
810        Ok(ptr)
811    }
812
813    fn direct_allocate(
814        &self,
815        size: usize,
816        memory_type: MetalMemoryType,
817    ) -> Result<*mut c_void, MetalError> {
818        let alignment = match memory_type {
819            MetalMemoryType::Private => 64,
820            MetalMemoryType::Shared => 16,
821            MetalMemoryType::Managed => 16,
822            MetalMemoryType::Memoryless => 64,
823        };
824
825        // Simulate Metal buffer allocation
826        match memory_type {
827            MetalMemoryType::Private => Ok(unsafe {
828                std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
829                    size, alignment,
830                )) as *mut c_void
831            }),
832            MetalMemoryType::Shared => Ok(unsafe {
833                std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
834                    size, alignment,
835                )) as *mut c_void
836            }),
837            MetalMemoryType::Managed => Ok(unsafe {
838                std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
839                    size, alignment,
840                )) as *mut c_void
841            }),
842            MetalMemoryType::Memoryless => {
843                if size > 8 * 1024 * 1024 {
844                    return Err(MetalError::UnsupportedOperation(
845                        "Memoryless allocation too large".to_string(),
846                    ));
847                }
848                Ok(unsafe {
849                    std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
850                        size, alignment,
851                    )) as *mut c_void
852                })
853            }
854        }
855    }
856
857    /// Free memory
858    pub fn free(
859        &mut self,
860        ptr: *mut c_void,
861        memory_type: MetalMemoryType,
862    ) -> Result<(), MetalError> {
863        if self.config.enable_memory_pools {
864            if let Some(pool) = self.memory_pools.get_mut(&memory_type) {
865                pool.free(ptr)?;
866            } else {
867                return Err(MetalError::UnsupportedMemoryType(
868                    "Memory type not supported".to_string(),
869                ));
870            }
871        } else {
872            // Direct deallocation
873            unsafe {
874                std::alloc::dealloc(
875                    ptr as *mut u8,
876                    std::alloc::Layout::from_size_align_unchecked(1, 1),
877                );
878            }
879        }
880
881        self.stats.total_deallocations += 1;
882        Ok(())
883    }
884
885    /// Copy memory using Metal blit encoder
886    pub fn blit_copy(
887        &mut self,
888        src: *const c_void,
889        dst: *mut c_void,
890        size: usize,
891        queue_id: u32,
892    ) -> Result<(), MetalError> {
893        let buffer_id = self.command_manager.create_command_buffer(queue_id)?;
894        let command = MetalCommand::BlitCommand {
895            src_buffer: src as *mut c_void,
896            dst_buffer: dst,
897            size,
898        };
899
900        self.command_manager
901            .add_command(queue_id, buffer_id, command)?;
902        self.command_manager
903            .commit_command_buffer(queue_id, buffer_id)?;
904        self.command_manager
905            .wait_until_completed(queue_id, buffer_id)?;
906
907        self.stats.blit_commands += 1;
908        Ok(())
909    }
910
911    /// Create command queue
912    pub fn create_command_queue(
913        &mut self,
914        label: Option<String>,
915        priority: MetalQueuePriority,
916    ) -> Result<u32, MetalError> {
917        self.command_manager.create_command_queue(label, priority)
918    }
919
920    /// Get device properties
921    pub fn get_device_properties(&self) -> &MetalDeviceProperties {
922        &self.device_properties
923    }
924
925    /// Get statistics
926    pub fn get_stats(&self) -> &MetalStats {
927        &self.stats
928    }
929
930    /// Wait for all operations to complete
931    pub fn wait_until_idle(&mut self) -> Result<(), MetalError> {
932        // Wait for all command buffers to complete
933        for queue in &self.command_manager.queues {
934            for buffer in &queue.command_buffers {
935                if buffer.committed && !buffer.completed {
936                    std::thread::sleep(Duration::from_micros(100));
937                }
938            }
939        }
940        Ok(())
941    }
942}
943
944// Safety: MetalMemoryBackend manages Metal GPU memory pointers via *mut c_void.
945// While raw pointers are not Send/Sync by default, it's safe to share across threads
946// when protected by Arc<Mutex<>> because:
947// 1. All pointers point to Metal GPU memory managed by the Metal framework
948// 2. The Mutex provides exclusive access for all mutable operations
949// 3. No thread-local state is maintained
950unsafe impl Send for MetalMemoryBackend {}
951unsafe impl Sync for MetalMemoryBackend {}
952
953/// Metal errors
954#[derive(Debug, Clone)]
955pub enum MetalError {
956    DeviceNotFound(String),
957    OutOfMemory(String),
958    InvalidPointer(String),
959    InvalidQueue(String),
960    InvalidCommandBuffer(String),
961    QueueFull(String),
962    InvalidOperation(String),
963    UnsupportedOperation(String),
964    UnsupportedMemoryType(String),
965    AllocationFailed(String),
966    InternalError(String),
967}
968
969impl std::fmt::Display for MetalError {
970    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
971        match self {
972            MetalError::DeviceNotFound(msg) => write!(f, "Device not found: {}", msg),
973            MetalError::OutOfMemory(msg) => write!(f, "Out of memory: {}", msg),
974            MetalError::InvalidPointer(msg) => write!(f, "Invalid pointer: {}", msg),
975            MetalError::InvalidQueue(msg) => write!(f, "Invalid queue: {}", msg),
976            MetalError::InvalidCommandBuffer(msg) => write!(f, "Invalid command buffer: {}", msg),
977            MetalError::QueueFull(msg) => write!(f, "Queue full: {}", msg),
978            MetalError::InvalidOperation(msg) => write!(f, "Invalid operation: {}", msg),
979            MetalError::UnsupportedOperation(msg) => write!(f, "Unsupported operation: {}", msg),
980            MetalError::UnsupportedMemoryType(msg) => write!(f, "Unsupported memory type: {}", msg),
981            MetalError::AllocationFailed(msg) => write!(f, "Allocation failed: {}", msg),
982            MetalError::InternalError(msg) => write!(f, "Internal error: {}", msg),
983        }
984    }
985}
986
987impl std::error::Error for MetalError {}
988
989/// Thread-safe Metal backend wrapper
990pub struct ThreadSafeMetalBackend {
991    backend: Arc<Mutex<MetalMemoryBackend>>,
992}
993
994impl ThreadSafeMetalBackend {
995    pub fn new(config: MetalConfig) -> Result<Self, MetalError> {
996        let backend = MetalMemoryBackend::new(config)?;
997        Ok(Self {
998            backend: Arc::new(Mutex::new(backend)),
999        })
1000    }
1001
1002    pub fn allocate(
1003        &self,
1004        size: usize,
1005        memory_type: MetalMemoryType,
1006    ) -> Result<*mut c_void, MetalError> {
1007        let mut backend = self.backend.lock().unwrap();
1008        backend.allocate(size, memory_type)
1009    }
1010
1011    pub fn free(&self, ptr: *mut c_void, memory_type: MetalMemoryType) -> Result<(), MetalError> {
1012        let mut backend = self.backend.lock().unwrap();
1013        backend.free(ptr, memory_type)
1014    }
1015
1016    pub fn get_stats(&self) -> MetalStats {
1017        let backend = self.backend.lock().unwrap();
1018        backend.get_stats().clone()
1019    }
1020}
1021
1022#[cfg(test)]
1023mod tests {
1024    use super::*;
1025
1026    #[test]
1027    fn test_metal_backend_creation() {
1028        let config = MetalConfig::default();
1029        let backend = MetalMemoryBackend::new(config);
1030        assert!(backend.is_ok());
1031    }
1032
1033    #[test]
1034    fn test_memory_pool() {
1035        let mut pool = MetalMemoryPool::new(MetalMemoryType::Private, 1024 * 1024);
1036        let ptr = pool.allocate(1024);
1037        assert!(ptr.is_ok());
1038
1039        let ptr = ptr.unwrap();
1040        let result = pool.free(ptr);
1041        assert!(result.is_ok());
1042    }
1043
1044    #[test]
1045    fn test_command_manager() {
1046        let mut manager = MetalCommandManager::new(MetalCommandConfig::default());
1047        let queue_id =
1048            manager.create_command_queue(Some("test".to_string()), MetalQueuePriority::Normal);
1049        assert!(queue_id.is_ok());
1050
1051        let queue_id = queue_id.unwrap();
1052        let buffer_id = manager.create_command_buffer(queue_id);
1053        assert!(buffer_id.is_ok());
1054    }
1055
1056    #[test]
1057    fn test_thread_safe_backend() {
1058        let config = MetalConfig::default();
1059        let backend = ThreadSafeMetalBackend::new(config);
1060        assert!(backend.is_ok());
1061
1062        let backend = backend.unwrap();
1063        let stats = backend.get_stats();
1064        assert_eq!(stats.total_allocations, 0);
1065    }
1066}