Skip to main content

wgpu_hal/metal/
mod.rs

1/*!
2# Metal API internals.
3
4## Pipeline Layout
5
6In Metal, immediates, vertex buffers, and resources in the bind groups
7are all placed together in the native resource bindings, which work similarly to D3D11:
8there are tables of textures, buffers, and samplers.
9
10We put immediates first (if any) in the table, followed by bind group 0
11resources, followed by other bind groups. The vertex buffers are bound at the very
12end of the VS buffer table.
13
14!*/
15
16// `MTLFeatureSet` is superseded by `MTLGpuFamily`.
17// However, `MTLGpuFamily` is only supported starting MacOS 10.15, whereas our minimum target is MacOS 10.13,
18// See https://github.com/gpuweb/gpuweb/issues/1069 for minimum spec.
19// TODO: Eventually all deprecated features should be abstracted and use new api when available.
20#[allow(deprecated)]
21mod adapter;
22mod command;
23mod conv;
24mod device;
25mod library_from_metallib;
26mod surface;
27mod time;
28
29use alloc::{
30    string::{String, ToString as _},
31    sync::Arc,
32    vec::Vec,
33};
34use core::{fmt, iter, ops, ptr::NonNull, sync::atomic};
35
36use bitflags::bitflags;
37use hashbrown::HashMap;
38use naga::FastHashMap;
39use objc2::{
40    available,
41    rc::{autoreleasepool, Retained},
42    runtime::ProtocolObject,
43};
44use objc2_foundation::ns_string;
45use objc2_metal::{
46    MTLAccelerationStructure, MTLAccelerationStructureCommandEncoder, MTLArgumentBuffersTier,
47    MTLBlitCommandEncoder, MTLBuffer, MTLCommandBuffer, MTLCommandBufferStatus, MTLCommandQueue,
48    MTLComputeCommandEncoder, MTLComputePipelineState, MTLCounterSampleBuffer, MTLCullMode,
49    MTLDepthClipMode, MTLDepthStencilState, MTLDevice, MTLDrawable, MTLIndexType,
50    MTLLanguageVersion, MTLLibrary, MTLPrimitiveType, MTLReadWriteTextureTier,
51    MTLRenderCommandEncoder, MTLRenderPipelineState, MTLRenderStages, MTLResource,
52    MTLResourceUsage, MTLSamplerState, MTLSharedEvent, MTLSize, MTLTexture, MTLTextureType,
53    MTLTriangleFillMode, MTLWinding,
54};
55use objc2_quartz_core::CAMetalLayer;
56use parking_lot::{Mutex, RwLock};
57
58#[derive(Clone, Debug)]
59pub struct Api;
60
61type ResourceIndex = u32;
62
63impl crate::Api for Api {
64    const VARIANT: wgt::Backend = wgt::Backend::Metal;
65
66    type Instance = Instance;
67    type Surface = Surface;
68    type Adapter = Adapter;
69    type Device = Device;
70
71    type Queue = Queue;
72    type CommandEncoder = CommandEncoder;
73    type CommandBuffer = CommandBuffer;
74
75    type Buffer = Buffer;
76    type Texture = Texture;
77    type SurfaceTexture = SurfaceTexture;
78    type TextureView = TextureView;
79    type Sampler = Sampler;
80    type QuerySet = QuerySet;
81    type Fence = Fence;
82
83    type BindGroupLayout = BindGroupLayout;
84    type BindGroup = BindGroup;
85    type PipelineLayout = PipelineLayout;
86    type ShaderModule = ShaderModule;
87    type RenderPipeline = RenderPipeline;
88    type ComputePipeline = ComputePipeline;
89    type PipelineCache = PipelineCache;
90
91    type AccelerationStructure = AccelerationStructure;
92}
93
94crate::impl_dyn_resource!(
95    Adapter,
96    AccelerationStructure,
97    BindGroup,
98    BindGroupLayout,
99    Buffer,
100    CommandBuffer,
101    CommandEncoder,
102    ComputePipeline,
103    Device,
104    Fence,
105    Instance,
106    PipelineCache,
107    PipelineLayout,
108    QuerySet,
109    Queue,
110    RenderPipeline,
111    Sampler,
112    ShaderModule,
113    Surface,
114    SurfaceTexture,
115    Texture,
116    TextureView
117);
118
119/// Provides availability information about Mac APIs.
120///
121/// This may include Metal features that depend only on software support.
122/// Features with varying hardware support are in [`CapabilitiesQuery`]
123///
124/// When feature detection is only needed once, it may also be done inline.
125struct OsFeatures;
126
127impl OsFeatures {
128    fn display_sync() -> bool {
129        // https://developer.apple.com/documentation/quartzcore/cametallayer/displaysyncenabled
130        available!(macos = 10.13) || cfg!(target_abi = "macabi")
131    }
132}
133
134pub struct Instance {}
135
136impl Instance {
137    pub fn create_surface_from_layer(&self, layer: &CAMetalLayer) -> Surface {
138        Surface::from_layer(layer)
139    }
140}
141
142impl crate::Instance for Instance {
143    type A = Api;
144
145    unsafe fn init(_desc: &crate::InstanceDescriptor<'_>) -> Result<Self, crate::InstanceError> {
146        profiling::scope!("Init Metal Backend");
147        // We do not enable metal validation based on the validation flags as it affects the entire
148        // process. Instead, we enable the validation inside the test harness itself in tests/src/native.rs.
149        Ok(Instance {})
150    }
151
152    unsafe fn create_surface(
153        &self,
154        display_handle: raw_window_handle::RawDisplayHandle,
155        window_handle: raw_window_handle::RawWindowHandle,
156    ) -> Result<Surface, crate::InstanceError> {
157        let layer = match (display_handle, window_handle) {
158            (
159                raw_window_handle::RawDisplayHandle::AppKit(_),
160                raw_window_handle::RawWindowHandle::AppKit(handle),
161            ) => unsafe { raw_window_metal::Layer::from_ns_view(handle.ns_view) },
162            (
163                raw_window_handle::RawDisplayHandle::UiKit(_),
164                raw_window_handle::RawWindowHandle::UiKit(handle),
165            ) => unsafe { raw_window_metal::Layer::from_ui_view(handle.ui_view) },
166            _ => {
167                return Err(crate::InstanceError::new(format!(
168                    "window handle {window_handle:?} is not a Metal-compatible handle"
169                )))
170            }
171        };
172
173        // SAFETY: The layer is an initialized instance of `CAMetalLayer`, and
174        // we transfer the retain count to `Retained` using `into_raw`.
175        let layer = unsafe {
176            Retained::from_raw(layer.into_raw().cast::<CAMetalLayer>().as_ptr()).unwrap()
177        };
178
179        Ok(Surface::new(layer))
180    }
181
182    unsafe fn enumerate_adapters(
183        &self,
184        _surface_hint: Option<&Surface>,
185    ) -> Vec<crate::ExposedAdapter<Api>> {
186        let devices = objc2_metal::MTLCopyAllDevices();
187        let mut adapters: Vec<crate::ExposedAdapter<Api>> =
188            devices.into_iter().map(AdapterShared::expose).collect();
189        adapters.sort_by_key(|ad| {
190            (
191                ad.adapter.shared.private_caps.low_power,
192                ad.adapter.shared.private_caps.headless,
193            )
194        });
195        adapters
196    }
197}
198
199bitflags!(
200    /// Similar to `MTLCounterSamplingPoint`, but a bit higher abstracted for our purposes.
201    #[derive(Debug, Copy, Clone)]
202    pub struct TimestampQuerySupport: u32 {
203        /// On creating Metal encoders.
204        const STAGE_BOUNDARIES = 1 << 1;
205        /// Within existing draw encoders.
206        const ON_RENDER_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 2);
207        /// Within existing dispatch encoders.
208        const ON_COMPUTE_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 3);
209        /// Within existing blit encoders.
210        const ON_BLIT_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 4);
211
212        /// Within any wgpu render/compute pass.
213        const INSIDE_WGPU_PASSES = Self::ON_RENDER_ENCODER.bits() | Self::ON_COMPUTE_ENCODER.bits();
214    }
215);
216
217#[allow(dead_code)]
218struct CapabilitiesQuery {
219    msl_version: MTLLanguageVersion,
220    fragment_rw_storage: bool,
221    read_write_texture_tier: MTLReadWriteTextureTier,
222    msaa_desktop: bool,
223    msaa_apple3: bool,
224    msaa_apple7: bool,
225    resource_heaps: bool,
226    argument_buffers: Option<MTLArgumentBuffersTier>,
227    mutable_comparison_samplers: bool,
228    sampler_clamp_to_border: bool,
229    indirect_draw_dispatch: bool,
230    base_vertex_first_instance_drawing: bool,
231    dual_source_blending: bool,
232    low_power: bool,
233    headless: bool,
234    layered_rendering: bool,
235    function_specialization: bool,
236    depth_clip_mode: bool,
237    texture_cube_array: bool,
238    supports_float_filtering: bool,
239    format_depth24_stencil8: bool,
240    format_depth32_stencil8_filter: bool,
241    format_depth32_stencil8_none: bool,
242    format_min_srgb_channels: u8,
243    format_b5: bool,
244    format_bc: bool,
245    format_eac_etc: bool,
246    format_astc: bool,
247    format_astc_hdr: bool,
248    format_astc_3d: bool,
249    format_any8_unorm_srgb_all: bool,
250    format_any8_unorm_srgb_no_write: bool,
251    format_any8_snorm_all: bool,
252    format_r16_norm_all: bool,
253    format_r32_all: bool,
254    format_r32_no_write: bool,
255    format_r32float_no_write_no_filter: bool,
256    format_r32float_no_filter: bool,
257    format_r32float_all: bool,
258    format_rgba8_srgb_all: bool,
259    format_rgba8_srgb_no_write: bool,
260    format_rgb10a2_unorm_all: bool,
261    format_rgb10a2_unorm_no_write: bool,
262    format_rgb10a2_uint_write: bool,
263    format_rg11b10_all: bool,
264    format_rg11b10_no_write: bool,
265    format_rgb9e5_all: bool,
266    format_rgb9e5_no_write: bool,
267    format_rgb9e5_filter_only: bool,
268    format_rg32_color: bool,
269    format_rg32_color_write: bool,
270    format_rg32float_all: bool,
271    format_rg32float_color_blend: bool,
272    format_rg32float_no_filter: bool,
273    format_rgba32int_color: bool,
274    format_rgba32int_color_write: bool,
275    format_rgba32float_color: bool,
276    format_rgba32float_color_write: bool,
277    format_rgba32float_all: bool,
278    format_depth16unorm: bool,
279    format_depth16unorm_filter: bool,
280    format_depth32float_filter: bool,
281    format_depth32float_none: bool,
282    format_bgr10a2_all: bool,
283    format_bgr10a2_no_write: bool,
284    max_buffers_per_stage: ResourceIndex,
285    max_vertex_buffers: ResourceIndex,
286    max_textures_per_stage: ResourceIndex,
287    max_samplers_per_stage: ResourceIndex,
288    max_binding_array_elements: ResourceIndex,
289    max_sampler_binding_array_elements: ResourceIndex,
290    buffer_alignment: u64,
291
292    /// Platform-reported maximum buffer size
293    ///
294    /// This value is clamped to `u32::MAX` for `wgt::Limits`, so you probably
295    /// shouldn't be looking at this copy.
296    max_buffer_size: u64,
297    max_texture_size: u64,
298    max_texture_3d_size: u64,
299    max_texture_layers: u64,
300    max_fragment_input_components: u64,
301    max_color_render_targets: u8,
302    max_color_attachment_bytes_per_sample: u8,
303    max_varying_components: u32,
304    max_threads_per_group: u32,
305    max_total_threadgroup_memory: u32,
306    sample_count_mask: crate::TextureFormatCapabilities,
307    supports_debug_markers: bool,
308    supports_binary_archives: bool,
309    supports_arrays_of_textures: bool,
310    supports_arrays_of_textures_write: bool,
311    supports_depth_clip_control: bool,
312    supports_shader_primitive_index: bool,
313    has_unified_memory: Option<bool>,
314    timestamp_query_support: TimestampQuerySupport,
315    supports_simd_scoped_operations: bool,
316    supports_cooperative_matrix: bool,
317    int64: bool,
318    int64_atomics_min_max: bool,
319    int64_atomics: bool,
320    float_atomics: bool,
321    mesh_shaders: bool,
322    max_mesh_task_workgroup_count: u32,
323    max_task_payload_size: u32,
324    supported_vertex_amplification_factor: u32,
325    shader_barycentrics: bool,
326    supports_memoryless_storage: bool,
327    supports_raytracing: bool,
328}
329
330#[derive(Debug)]
331struct PrivateCapabilities {
332    msl_version: MTLLanguageVersion,
333    low_power: bool,
334    headless: bool,
335    has_unified_memory: Option<bool>,
336    timestamp_query_support: TimestampQuerySupport,
337    supports_memoryless_storage: bool,
338    mesh_shaders: bool,
339    max_buffers_per_stage: ResourceIndex,
340    max_vertex_buffers: ResourceIndex,
341    max_textures_per_stage: ResourceIndex,
342    max_samplers_per_stage: ResourceIndex,
343}
344
345#[derive(Debug)]
346struct PrivateTextureFormatCapabilities {
347    read_write_texture_tier: MTLReadWriteTextureTier,
348    sample_count_mask: crate::TextureFormatCapabilities,
349    int64_atomics: bool,
350    msaa_desktop: bool,
351    msaa_apple3: bool,
352    msaa_apple7: bool,
353    format_r32float_all: bool,
354    format_rgba8_srgb_all: bool,
355    format_rgb10a2_uint_write: bool,
356    format_rgb10a2_unorm_all: bool,
357    format_rg11b10_all: bool,
358    format_rg32float_all: bool,
359    format_rgba32float_all: bool,
360    format_depth16unorm: bool,
361    format_depth16unorm_filter: bool,
362    format_depth32float_filter: bool,
363    format_depth24_stencil8: bool,
364    format_bc: bool,
365    format_eac_etc: bool,
366    format_astc: bool,
367    format_astc_hdr: bool,
368}
369
370#[derive(Clone, Debug)]
371struct PrivateDisabilities {
372    /// Near depth is not respected properly on some Intel GPUs.
373    broken_viewport_near_depth: bool,
374    /// Multi-target clears don't appear to work properly on Intel GPUs.
375    #[allow(dead_code)]
376    broken_layered_clear_image: bool,
377}
378
379#[derive(Debug)]
380struct Settings {
381    retain_command_buffer_references: bool,
382}
383
384impl Default for Settings {
385    fn default() -> Self {
386        Self {
387            retain_command_buffer_references: true,
388        }
389    }
390}
391
392struct AdapterShared {
393    device: Retained<ProtocolObject<dyn MTLDevice>>,
394    disabilities: PrivateDisabilities,
395    private_caps: PrivateCapabilities,
396    private_texture_format_caps: PrivateTextureFormatCapabilities,
397    settings: Settings,
398    presentation_timer: time::PresentationTimer,
399}
400
401unsafe impl Send for AdapterShared {}
402unsafe impl Sync for AdapterShared {}
403
404impl AdapterShared {
405    fn new(
406        device: Retained<ProtocolObject<dyn MTLDevice>>,
407        capabilities_query: &CapabilitiesQuery,
408    ) -> Self {
409        let private_caps = capabilities_query.private_capabilities();
410        let private_texture_format_caps = capabilities_query.private_texture_format_capabilities();
411        log::debug!("{private_caps:#?}");
412        log::debug!("{private_texture_format_caps:#?}");
413
414        Self {
415            disabilities: PrivateDisabilities::new(&device),
416            private_caps,
417            private_texture_format_caps,
418            device,
419            settings: Settings::default(),
420            presentation_timer: time::PresentationTimer::new(),
421        }
422    }
423
424    fn expose(device: Retained<ProtocolObject<dyn MTLDevice>>) -> crate::ExposedAdapter<Api> {
425        let name = device.name().to_string();
426        let capabilities_query = CapabilitiesQuery::new(&device);
427        let shared = AdapterShared::new(device, &capabilities_query);
428        let features = capabilities_query.features();
429        let capabilities = capabilities_query.capabilities();
430        crate::ExposedAdapter {
431            info: wgt::AdapterInfo {
432                name,
433                vendor: 0,
434                device: 0,
435                device_type: shared.private_caps.device_type(),
436                device_pci_bus_id: String::new(),
437                driver: String::new(),
438                driver_info: String::new(),
439                backend: wgt::Backend::Metal,
440                // These are hardcoded based on typical values for Metal devices
441                //
442                // See <https://github.com/gpuweb/gpuweb/blob/main/proposals/subgroups.md#adapter-info>
443                // for more information.
444                subgroup_min_size: 4,
445                subgroup_max_size: 64,
446                transient_saves_memory: shared.private_caps.supports_memoryless_storage,
447            },
448            features,
449            capabilities,
450            adapter: Adapter::new(Arc::new(shared)),
451        }
452    }
453}
454
455pub struct Adapter {
456    shared: Arc<AdapterShared>,
457}
458
459pub struct Queue {
460    shared: Arc<QueueShared>,
461    timestamp_period: f32,
462}
463
464unsafe impl Send for Queue {}
465unsafe impl Sync for Queue {}
466
467impl Queue {
468    pub unsafe fn queue_from_raw(
469        raw: Retained<ProtocolObject<dyn MTLCommandQueue>>,
470        timestamp_period: f32,
471    ) -> Self {
472        Self {
473            shared: Arc::new(QueueShared {
474                raw,
475                command_buffer_created_not_submitted: atomic::AtomicUsize::new(0),
476            }),
477            timestamp_period,
478        }
479    }
480}
481
482#[derive(Debug)]
483pub struct QueueShared {
484    raw: Retained<ProtocolObject<dyn MTLCommandQueue>>,
485    // Tracks command buffers created via `CommandEncoder::begin_encoding` that
486    // have not yet been submitted or discarded. Used to proactively fail
487    // before hitting Metal's `maxCommandBufferCount`.
488    //
489    // (In a few places we call `.commandBuffer{,WithUnretainedReferences}` directly
490    // to create command buffers for internal purposes. In those cases we always
491    // commit the buffer immediately, so we don't adjust the counter for them.)
492    command_buffer_created_not_submitted: atomic::AtomicUsize,
493}
494
495pub struct Device {
496    shared: Arc<AdapterShared>,
497    features: wgt::Features,
498    counters: Arc<wgt::HalCounters>,
499}
500
501pub struct Surface {
502    render_layer: Mutex<Retained<CAMetalLayer>>,
503    swapchain_format: RwLock<Option<wgt::TextureFormat>>,
504    extent: RwLock<wgt::Extent3d>,
505}
506
507unsafe impl Send for Surface {}
508unsafe impl Sync for Surface {}
509
510#[derive(Debug)]
511pub struct SurfaceTexture {
512    texture: Texture,
513    // Useful for UI-intensive applications that are sensitive to
514    // window resizing.
515    drawable: Retained<ProtocolObject<dyn MTLDrawable>>,
516    present_with_transaction: bool,
517}
518
519impl crate::DynSurfaceTexture for SurfaceTexture {}
520
521impl core::borrow::Borrow<Texture> for SurfaceTexture {
522    fn borrow(&self) -> &Texture {
523        &self.texture
524    }
525}
526
527impl core::borrow::Borrow<dyn crate::DynTexture> for SurfaceTexture {
528    fn borrow(&self) -> &dyn crate::DynTexture {
529        &self.texture
530    }
531}
532
533unsafe impl Send for SurfaceTexture {}
534unsafe impl Sync for SurfaceTexture {}
535
536impl crate::Queue for Queue {
537    type A = Api;
538
539    unsafe fn submit(
540        &self,
541        command_buffers: &[&CommandBuffer],
542        _surface_textures: &[&SurfaceTexture],
543        (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
544    ) -> Result<(), crate::DeviceError> {
545        autoreleasepool(|_| {
546            let extra_command_buffer = {
547                let completed_value = Arc::clone(&signal_fence.completed_value);
548                let block = block2::RcBlock::new(move |_cmd_buf| {
549                    completed_value.store(signal_value, atomic::Ordering::Release);
550                });
551
552                let raw = match command_buffers.last() {
553                    Some(&cmd_buf) => cmd_buf.raw.clone(),
554                    None => {
555                        // We do not bother adjusting `command_buffer_created_not_submitted`
556                        // because we immediately commit this buffer.
557                        self.shared
558                            .raw
559                            .commandBufferWithUnretainedReferences()
560                            .unwrap()
561                    }
562                };
563                raw.setLabel(Some(ns_string!("(wgpu internal) Signal")));
564                unsafe { raw.addCompletedHandler(block2::RcBlock::as_ptr(&block)) };
565
566                signal_fence.maintain();
567                signal_fence
568                    .pending_command_buffers
569                    .push((signal_value, raw.clone()));
570
571                if let Some(shared_event) = &signal_fence.shared_event {
572                    raw.encodeSignalEvent_value(shared_event.as_ref(), signal_value);
573                }
574                // only return an extra one if it's extra
575                match command_buffers.last() {
576                    Some(_) => None,
577                    None => Some(raw),
578                }
579            };
580
581            for cmd_buffer in command_buffers {
582                cmd_buffer.raw.commit();
583                // One command buffer per `end_encoding` call moves from the
584                // "created but not yet submitted" bucket into the submitted
585                // set, so update the counter.
586                let previous = self
587                    .shared
588                    .command_buffer_created_not_submitted
589                    .fetch_sub(1, atomic::Ordering::AcqRel);
590                debug_assert!(previous > 0);
591            }
592
593            if let Some(raw) = extra_command_buffer {
594                raw.commit();
595            }
596        });
597        Ok(())
598    }
599    unsafe fn present(
600        &self,
601        _surface: &Surface,
602        texture: SurfaceTexture,
603    ) -> Result<(), crate::SurfaceError> {
604        autoreleasepool(|_| {
605            // We do not bother adjusting `command_buffer_created_not_submitted`
606            // because we immediately commit this buffer.
607            let command_buffer = self.shared.raw.commandBuffer().unwrap();
608            command_buffer.setLabel(Some(ns_string!("(wgpu internal) Present")));
609
610            // https://developer.apple.com/documentation/quartzcore/cametallayer/1478157-presentswithtransaction?language=objc
611            if !texture.present_with_transaction {
612                command_buffer.presentDrawable(&texture.drawable);
613            }
614
615            command_buffer.commit();
616
617            if texture.present_with_transaction {
618                command_buffer.waitUntilScheduled();
619                texture.drawable.present();
620            }
621        });
622        Ok(())
623    }
624
625    unsafe fn get_timestamp_period(&self) -> f32 {
626        self.timestamp_period
627    }
628}
629
630#[derive(Debug)]
631pub struct Buffer {
632    raw: Retained<ProtocolObject<dyn MTLBuffer>>,
633    size: wgt::BufferAddress,
634}
635
636unsafe impl Send for Buffer {}
637unsafe impl Sync for Buffer {}
638
639impl crate::DynBuffer for Buffer {}
640
641impl Buffer {
642    fn as_raw(&self) -> NonNull<ProtocolObject<dyn MTLBuffer>> {
643        unsafe { NonNull::new_unchecked(Retained::as_ptr(&self.raw) as *mut _) }
644    }
645}
646
647impl crate::BufferBinding<'_, Buffer> {
648    fn resolve_size(&self) -> wgt::BufferAddress {
649        match self.size {
650            Some(size) => size.get(),
651            None => self.buffer.size - self.offset,
652        }
653    }
654}
655
656#[derive(Debug)]
657pub struct Texture {
658    raw: Retained<ProtocolObject<dyn MTLTexture>>,
659    format: wgt::TextureFormat,
660    raw_type: MTLTextureType,
661    array_layers: u32,
662    mip_levels: u32,
663    copy_size: crate::CopyExtent,
664}
665
666impl Texture {
667    pub fn raw_handle(&self) -> &ProtocolObject<dyn MTLTexture> {
668        &self.raw
669    }
670}
671
672impl crate::DynTexture for Texture {}
673
674unsafe impl Send for Texture {}
675unsafe impl Sync for Texture {}
676
677#[derive(Debug)]
678pub struct TextureView {
679    raw: Retained<ProtocolObject<dyn MTLTexture>>,
680    aspects: crate::FormatAspects,
681}
682
683impl crate::DynTextureView for TextureView {}
684
685unsafe impl Send for TextureView {}
686unsafe impl Sync for TextureView {}
687
688impl TextureView {
689    fn as_raw(&self) -> NonNull<ProtocolObject<dyn MTLTexture>> {
690        unsafe { NonNull::new_unchecked(Retained::as_ptr(&self.raw) as *mut _) }
691    }
692}
693
694#[derive(Debug)]
695pub struct Sampler {
696    raw: Retained<ProtocolObject<dyn MTLSamplerState>>,
697}
698
699impl crate::DynSampler for Sampler {}
700
701unsafe impl Send for Sampler {}
702unsafe impl Sync for Sampler {}
703
704impl Sampler {
705    fn as_raw(&self) -> NonNull<ProtocolObject<dyn MTLSamplerState>> {
706        unsafe { NonNull::new_unchecked(Retained::as_ptr(&self.raw) as *mut _) }
707    }
708}
709
710#[derive(Debug)]
711pub struct BindGroupLayout {
712    /// Sorted list of BGL entries.
713    entries: Arc<[wgt::BindGroupLayoutEntry]>,
714}
715
716impl crate::DynBindGroupLayout for BindGroupLayout {}
717
718#[derive(Clone, Debug, Default)]
719struct ResourceData<T> {
720    buffers: T,
721    textures: T,
722    samplers: T,
723}
724
725#[derive(Clone, Debug, Default)]
726struct MultiStageData<T> {
727    vs: T,
728    fs: T,
729    cs: T,
730    ts: T,
731    ms: T,
732}
733
734const NAGA_STAGES: MultiStageData<naga::ShaderStage> = MultiStageData {
735    vs: naga::ShaderStage::Vertex,
736    fs: naga::ShaderStage::Fragment,
737    cs: naga::ShaderStage::Compute,
738    ts: naga::ShaderStage::Task,
739    ms: naga::ShaderStage::Mesh,
740};
741
742impl<T> ops::Index<naga::ShaderStage> for MultiStageData<T> {
743    type Output = T;
744    fn index(&self, stage: naga::ShaderStage) -> &T {
745        match stage {
746            naga::ShaderStage::Vertex => &self.vs,
747            naga::ShaderStage::Fragment => &self.fs,
748            naga::ShaderStage::Compute => &self.cs,
749            naga::ShaderStage::Task => &self.ts,
750            naga::ShaderStage::Mesh => &self.ms,
751            naga::ShaderStage::RayGeneration
752            | naga::ShaderStage::AnyHit
753            | naga::ShaderStage::ClosestHit
754            | naga::ShaderStage::Miss => unimplemented!(),
755        }
756    }
757}
758
759impl<T> MultiStageData<T> {
760    fn map_ref<Y>(&self, fun: impl Fn(&T) -> Y) -> MultiStageData<Y> {
761        MultiStageData {
762            vs: fun(&self.vs),
763            fs: fun(&self.fs),
764            cs: fun(&self.cs),
765            ts: fun(&self.ts),
766            ms: fun(&self.ms),
767        }
768    }
769    fn map<Y>(self, fun: impl Fn(T) -> Y) -> MultiStageData<Y> {
770        MultiStageData {
771            vs: fun(self.vs),
772            fs: fun(self.fs),
773            cs: fun(self.cs),
774            ts: fun(self.ts),
775            ms: fun(self.ms),
776        }
777    }
778    fn iter<'a>(&'a self) -> impl Iterator<Item = &'a T> {
779        iter::once(&self.vs)
780            .chain(iter::once(&self.fs))
781            .chain(iter::once(&self.cs))
782            .chain(iter::once(&self.ts))
783            .chain(iter::once(&self.ms))
784    }
785    fn iter_mut<'a>(&'a mut self) -> impl Iterator<Item = &'a mut T> {
786        iter::once(&mut self.vs)
787            .chain(iter::once(&mut self.fs))
788            .chain(iter::once(&mut self.cs))
789            .chain(iter::once(&mut self.ts))
790            .chain(iter::once(&mut self.ms))
791    }
792}
793
794type MultiStageResourceCounters = MultiStageData<ResourceData<ResourceIndex>>;
795type MultiStageResources = MultiStageData<naga::back::msl::EntryPointResources>;
796
797#[derive(Debug)]
798struct BindGroupLayoutInfo {
799    base_resource_indices: MultiStageResourceCounters,
800}
801
802#[derive(Copy, Clone, Debug, Eq, PartialEq)]
803struct ImmediateDataInfo {
804    count: u32,
805    buffer_index: ResourceIndex,
806}
807
808#[derive(Debug)]
809pub struct PipelineLayout {
810    bind_group_infos: [Option<BindGroupLayoutInfo>; crate::MAX_BIND_GROUPS],
811    immediates_infos: MultiStageData<Option<ImmediateDataInfo>>,
812    total_counters: MultiStageResourceCounters,
813    total_immediates: u32,
814    per_stage_map: MultiStageResources,
815}
816
817impl crate::DynPipelineLayout for PipelineLayout {}
818
819#[derive(Debug)]
820enum BufferLikeResource {
821    Buffer {
822        ptr: NonNull<ProtocolObject<dyn MTLBuffer>>,
823        offset: wgt::BufferAddress,
824        dynamic_index: Option<u32>,
825
826        /// The buffer's size, if it is a [`Storage`] binding. Otherwise `None`.
827        ///
828        /// Buffers with the [`wgt::BufferBindingType::Storage`] binding type can
829        /// hold WGSL runtime-sized arrays. When one does, we must pass its size to
830        /// shader entry points to implement bounds checks and WGSL's `arrayLength`
831        /// function. See `device::CompiledShader::sized_bindings` for details.
832        ///
833        /// [`Storage`]: wgt::BufferBindingType::Storage
834        binding_size: Option<wgt::BufferSize>,
835
836        binding_location: u32,
837    },
838    AccelerationStructure(NonNull<ProtocolObject<dyn MTLAccelerationStructure>>),
839}
840
841#[derive(Debug)]
842struct UseResourceInfo {
843    uses: MTLResourceUsage,
844    stages: MTLRenderStages,
845    visible_in_compute: bool,
846}
847
848impl Default for UseResourceInfo {
849    fn default() -> Self {
850        Self {
851            uses: MTLResourceUsage::empty(),
852            stages: MTLRenderStages::empty(),
853            visible_in_compute: false,
854        }
855    }
856}
857
858#[derive(Debug, Default)]
859pub struct BindGroup {
860    counters: MultiStageResourceCounters,
861    buffers: Vec<BufferLikeResource>,
862    samplers: Vec<NonNull<ProtocolObject<dyn MTLSamplerState>>>,
863    textures: Vec<NonNull<ProtocolObject<dyn MTLTexture>>>,
864
865    argument_buffers: Vec<Retained<ProtocolObject<dyn MTLBuffer>>>,
866    resources_to_use: HashMap<NonNull<ProtocolObject<dyn MTLResource>>, UseResourceInfo>,
867}
868
869impl crate::DynBindGroup for BindGroup {}
870
871unsafe impl Send for BindGroup {}
872unsafe impl Sync for BindGroup {}
873
874#[derive(Debug)]
875pub enum ShaderModuleSource {
876    Naga(crate::NagaShader),
877    Passthrough(PassthroughShader),
878}
879
880#[derive(Debug)]
881pub struct PassthroughShader {
882    pub library: Retained<ProtocolObject<dyn MTLLibrary>>,
883    pub num_workgroups: (u32, u32, u32),
884}
885
886unsafe impl Send for PassthroughShader {}
887unsafe impl Sync for PassthroughShader {}
888
889#[derive(Debug)]
890pub struct ShaderModule {
891    source: ShaderModuleSource,
892    bounds_checks: wgt::ShaderRuntimeChecks,
893}
894
895impl crate::DynShaderModule for ShaderModule {}
896
897#[derive(Debug)]
898struct PipelineStageInfo {
899    #[allow(dead_code)]
900    library: Option<Retained<ProtocolObject<dyn MTLLibrary>>>,
901    immediates: Option<ImmediateDataInfo>,
902
903    /// The buffer argument table index at which we pass runtime-sized arrays' buffer sizes.
904    ///
905    /// See `device::CompiledShader::sized_bindings` for more details.
906    sizes_slot: Option<naga::back::msl::Slot>,
907
908    /// Bindings of all WGSL `storage` globals that contain runtime-sized arrays.
909    ///
910    /// See `device::CompiledShader::sized_bindings` for more details.
911    sized_bindings: Vec<naga::ResourceBinding>,
912
913    /// Info on all bound vertex buffers.
914    vertex_buffer_mappings: Vec<naga::back::msl::VertexBufferMapping>,
915
916    /// The workgroup size for compute, task or mesh stages
917    raw_wg_size: MTLSize,
918
919    /// The workgroup memory sizes for compute task or mesh stages
920    work_group_memory_sizes: Vec<u32>,
921}
922
923// TODO(madsmtm): Derive this when a release with
924// https://github.com/madsmtm/objc2/issues/804 is available (likely 0.4).
925impl Default for PipelineStageInfo {
926    fn default() -> Self {
927        Self {
928            library: Default::default(),
929            immediates: Default::default(),
930            sizes_slot: Default::default(),
931            sized_bindings: Default::default(),
932            vertex_buffer_mappings: Default::default(),
933            raw_wg_size: MTLSize {
934                width: 0,
935                height: 0,
936                depth: 0,
937            },
938            work_group_memory_sizes: Default::default(),
939        }
940    }
941}
942
943impl PipelineStageInfo {
944    fn clear(&mut self) {
945        self.immediates = None;
946        self.sizes_slot = None;
947        self.sized_bindings.clear();
948        self.vertex_buffer_mappings.clear();
949        self.library = None;
950        self.work_group_memory_sizes.clear();
951        self.raw_wg_size = MTLSize {
952            width: 0,
953            height: 0,
954            depth: 0,
955        };
956    }
957
958    fn assign_from(&mut self, other: &Self) {
959        self.immediates = other.immediates;
960        self.sizes_slot = other.sizes_slot;
961        self.sized_bindings.clear();
962        self.sized_bindings.extend_from_slice(&other.sized_bindings);
963        self.vertex_buffer_mappings.clear();
964        self.vertex_buffer_mappings
965            .extend_from_slice(&other.vertex_buffer_mappings);
966        self.library = Some(other.library.as_ref().unwrap().clone());
967        self.raw_wg_size = other.raw_wg_size;
968        self.work_group_memory_sizes.clear();
969        self.work_group_memory_sizes
970            .extend_from_slice(&other.work_group_memory_sizes);
971    }
972}
973
974#[derive(Debug)]
975pub struct RenderPipeline {
976    raw: Retained<ProtocolObject<dyn MTLRenderPipelineState>>,
977    vs_info: Option<PipelineStageInfo>,
978    fs_info: Option<PipelineStageInfo>,
979    ts_info: Option<PipelineStageInfo>,
980    ms_info: Option<PipelineStageInfo>,
981    raw_primitive_type: MTLPrimitiveType,
982    raw_triangle_fill_mode: MTLTriangleFillMode,
983    raw_front_winding: MTLWinding,
984    raw_cull_mode: MTLCullMode,
985    raw_depth_clip_mode: Option<MTLDepthClipMode>,
986    depth_stencil: Option<(
987        Retained<ProtocolObject<dyn MTLDepthStencilState>>,
988        wgt::DepthBiasState,
989    )>,
990}
991
992unsafe impl Send for RenderPipeline {}
993unsafe impl Sync for RenderPipeline {}
994
995impl crate::DynRenderPipeline for RenderPipeline {}
996
997#[derive(Debug)]
998pub struct ComputePipeline {
999    raw: Retained<ProtocolObject<dyn MTLComputePipelineState>>,
1000    cs_info: PipelineStageInfo,
1001}
1002
1003unsafe impl Send for ComputePipeline {}
1004unsafe impl Sync for ComputePipeline {}
1005
1006impl crate::DynComputePipeline for ComputePipeline {}
1007
1008#[derive(Debug, Clone)]
1009pub struct QuerySet {
1010    raw_buffer: Retained<ProtocolObject<dyn MTLBuffer>>,
1011    //Metal has a custom buffer for counters.
1012    counter_sample_buffer: Option<Retained<ProtocolObject<dyn MTLCounterSampleBuffer>>>,
1013    ty: wgt::QueryType,
1014}
1015
1016impl crate::DynQuerySet for QuerySet {}
1017
1018unsafe impl Send for QuerySet {}
1019unsafe impl Sync for QuerySet {}
1020
1021#[derive(Debug)]
1022pub struct Fence {
1023    completed_value: Arc<atomic::AtomicU64>,
1024    /// The pending fence values have to be ascending.
1025    pending_command_buffers: Vec<(
1026        crate::FenceValue,
1027        Retained<ProtocolObject<dyn MTLCommandBuffer>>,
1028    )>,
1029    shared_event: Option<Retained<ProtocolObject<dyn MTLSharedEvent>>>,
1030}
1031
1032impl crate::DynFence for Fence {}
1033
1034unsafe impl Send for Fence {}
1035unsafe impl Sync for Fence {}
1036
1037impl Fence {
1038    fn get_latest(&self) -> crate::FenceValue {
1039        let mut max_value = self.completed_value.load(atomic::Ordering::Acquire);
1040        for &(value, ref cmd_buf) in self.pending_command_buffers.iter() {
1041            if cmd_buf.status() == MTLCommandBufferStatus::Completed {
1042                max_value = value;
1043            }
1044        }
1045        max_value
1046    }
1047
1048    fn maintain(&mut self) {
1049        let latest = self.get_latest();
1050        self.pending_command_buffers
1051            .retain(|&(value, _)| value > latest);
1052    }
1053
1054    pub fn raw_shared_event(&self) -> Option<&ProtocolObject<dyn MTLSharedEvent>> {
1055        self.shared_event.as_deref()
1056    }
1057}
1058
1059struct IndexState {
1060    buffer_ptr: NonNull<ProtocolObject<dyn MTLBuffer>>,
1061    offset: wgt::BufferAddress,
1062    stride: wgt::BufferAddress,
1063    raw_type: MTLIndexType,
1064}
1065
1066#[derive(Default)]
1067struct Temp {
1068    binding_sizes: Vec<u32>,
1069}
1070
1071struct CommandState {
1072    blit: Option<Retained<ProtocolObject<dyn MTLBlitCommandEncoder>>>,
1073    acceleration_structure_builder:
1074        Option<Retained<ProtocolObject<dyn MTLAccelerationStructureCommandEncoder>>>,
1075    render: Option<Retained<ProtocolObject<dyn MTLRenderCommandEncoder>>>,
1076    compute: Option<Retained<ProtocolObject<dyn MTLComputeCommandEncoder>>>,
1077    raw_primitive_type: MTLPrimitiveType,
1078    index: Option<IndexState>,
1079    stage_infos: MultiStageData<PipelineStageInfo>,
1080
1081    /// Sizes of currently bound [`wgt::BufferBindingType::Storage`] buffers.
1082    ///
1083    /// Specifically:
1084    ///
1085    /// - The keys are [`ResourceBinding`] values (that is, the WGSL `@group`
1086    ///   and `@binding` attributes) for `var<storage>` global variables in the
1087    ///   current module that contain runtime-sized arrays.
1088    ///
1089    /// - The values are the actual sizes of the buffers currently bound to
1090    ///   provide those globals' contents, which are needed to implement bounds
1091    ///   checks and the WGSL `arrayLength` function.
1092    ///
1093    /// For each stage `S` in `stage_infos`, we consult this to find the sizes
1094    /// of the buffers listed in `stage_infos.S.sized_bindings`, which we must
1095    /// pass to the entry point.
1096    ///
1097    /// See `device::CompiledShader::sized_bindings` for more details.
1098    ///
1099    /// [`ResourceBinding`]: naga::ResourceBinding
1100    storage_buffer_length_map: FastHashMap<naga::ResourceBinding, wgt::BufferSize>,
1101
1102    vertex_buffer_size_map: FastHashMap<u64, wgt::BufferSize>,
1103
1104    immediates: Vec<u32>,
1105
1106    /// Timer query that should be executed when the next pass starts.
1107    pending_timer_queries: Vec<(QuerySet, u32)>,
1108}
1109
1110pub struct CommandEncoder {
1111    shared: Arc<AdapterShared>,
1112    queue_shared: Arc<QueueShared>,
1113    raw_cmd_buf: Option<Retained<ProtocolObject<dyn MTLCommandBuffer>>>,
1114    state: CommandState,
1115    temp: Temp,
1116    counters: Arc<wgt::HalCounters>,
1117}
1118
1119impl fmt::Debug for CommandEncoder {
1120    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1121        f.debug_struct("CommandEncoder")
1122            .field("raw_cmd_buf", &self.raw_cmd_buf)
1123            .finish()
1124    }
1125}
1126
1127unsafe impl Send for CommandEncoder {}
1128unsafe impl Sync for CommandEncoder {}
1129
1130#[derive(Debug)]
1131pub struct CommandBuffer {
1132    raw: Retained<ProtocolObject<dyn MTLCommandBuffer>>,
1133    queue_shared: Arc<QueueShared>,
1134}
1135
1136impl crate::DynCommandBuffer for CommandBuffer {}
1137
1138unsafe impl Send for CommandBuffer {}
1139unsafe impl Sync for CommandBuffer {}
1140
1141#[derive(Debug)]
1142pub struct PipelineCache;
1143
1144impl crate::DynPipelineCache for PipelineCache {}
1145
1146#[derive(Debug)]
1147pub struct AccelerationStructure {
1148    raw: Retained<ProtocolObject<dyn MTLAccelerationStructure>>,
1149}
1150
1151impl AccelerationStructure {
1152    fn as_raw(&self) -> NonNull<ProtocolObject<dyn MTLAccelerationStructure>> {
1153        unsafe { NonNull::new_unchecked(Retained::as_ptr(&self.raw) as *mut _) }
1154    }
1155}
1156
1157impl crate::DynAccelerationStructure for AccelerationStructure {}
1158unsafe impl Send for AccelerationStructure {}
1159unsafe impl Sync for AccelerationStructure {}
1160
1161#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1162pub enum OsType {
1163    Macos,
1164    Ios,
1165    Tvos,
1166    VisionOs,
1167}