Skip to main content

wgpu_hal/metal/
mod.rs

1/*!
2# Metal API internals.
3
4## Pipeline Layout
5
6In Metal, immediates, vertex buffers, and resources in the bind groups
7are all placed together in the native resource bindings, which work similarly to D3D11:
8there are tables of textures, buffers, and samplers.
9
10We put immediates first (if any) in the table, followed by bind group 0
11resources, followed by other bind groups. The vertex buffers are bound at the very
12end of the VS buffer table.
13
14!*/
15
16#[allow(
17    deprecated,
18    reason = "MTLFeatureSet` is superseded by `MTLGpuFamily`.
19        However, `MTLGpuFamily` is only supported starting MacOS 10.15, whereas our minimum target is MacOS 10.13,
20        See https://github.com/gpuweb/gpuweb/issues/1069 for minimum spec.
21        TODO: Eventually all deprecated features should be abstracted and use new api when available."
22)]
23mod adapter;
24mod command;
25mod conv;
26mod device;
27mod library_from_metallib;
28mod surface;
29mod time;
30
31use alloc::{
32    string::{String, ToString as _},
33    sync::Arc,
34    vec::Vec,
35};
36use core::{fmt, iter, ops, ptr::NonNull, sync::atomic};
37use std::sync::OnceLock;
38
39use bitflags::bitflags;
40use hashbrown::HashMap;
41use naga::FastHashMap;
42use objc2::{
43    available,
44    rc::{autoreleasepool, Retained},
45    runtime::ProtocolObject,
46};
47use objc2_foundation::ns_string;
48use objc2_metal::{
49    MTLAccelerationStructure, MTLAccelerationStructureCommandEncoder, MTLArgumentBuffersTier,
50    MTLBlitCommandEncoder, MTLBuffer, MTLCommandBuffer, MTLCommandBufferStatus, MTLCommandQueue,
51    MTLComputeCommandEncoder, MTLComputePipelineState, MTLCounterSampleBuffer, MTLCullMode,
52    MTLDepthClipMode, MTLDepthStencilState, MTLDevice, MTLDrawable, MTLIndexType,
53    MTLLanguageVersion, MTLLibrary, MTLPrimitiveType, MTLReadWriteTextureTier,
54    MTLRenderCommandEncoder, MTLRenderPipelineState, MTLRenderStages, MTLResource,
55    MTLResourceUsage, MTLSamplerState, MTLSharedEvent, MTLSize, MTLTexture, MTLTextureType,
56    MTLTriangleFillMode, MTLWinding,
57};
58use objc2_quartz_core::CAMetalLayer;
59use parking_lot::{Condvar, Mutex, RwLock};
60
61#[derive(Clone, Debug)]
62pub struct Api;
63
64type ResourceIndex = u32;
65
66impl crate::Api for Api {
67    const VARIANT: wgt::Backend = wgt::Backend::Metal;
68
69    type Instance = Instance;
70    type Surface = Surface;
71    type Adapter = Adapter;
72    type Device = Device;
73
74    type Queue = Queue;
75    type CommandEncoder = CommandEncoder;
76    type CommandBuffer = CommandBuffer;
77
78    type Buffer = Buffer;
79    type Texture = Texture;
80    type SurfaceTexture = SurfaceTexture;
81    type TextureView = TextureView;
82    type Sampler = Sampler;
83    type QuerySet = QuerySet;
84    type Fence = Fence;
85
86    type BindGroupLayout = BindGroupLayout;
87    type BindGroup = BindGroup;
88    type PipelineLayout = PipelineLayout;
89    type ShaderModule = ShaderModule;
90    type RenderPipeline = RenderPipeline;
91    type RayTracingPipeline = RayTracingPipeline;
92    type ComputePipeline = ComputePipeline;
93    type PipelineCache = PipelineCache;
94
95    type AccelerationStructure = AccelerationStructure;
96}
97
98crate::impl_dyn_resource!(
99    Adapter,
100    AccelerationStructure,
101    BindGroup,
102    BindGroupLayout,
103    Buffer,
104    CommandBuffer,
105    CommandEncoder,
106    ComputePipeline,
107    Device,
108    Fence,
109    Instance,
110    PipelineCache,
111    PipelineLayout,
112    QuerySet,
113    Queue,
114    RenderPipeline,
115    RayTracingPipeline,
116    Sampler,
117    ShaderModule,
118    Surface,
119    SurfaceTexture,
120    Texture,
121    TextureView
122);
123
124/// Provides availability information about Mac APIs.
125///
126/// This may include Metal features that depend only on software support.
127/// Features with varying hardware support are in [`CapabilitiesQuery`]
128///
129/// When feature detection is only needed once, it may also be done inline.
130struct OsFeatures;
131
132impl OsFeatures {
133    fn display_sync() -> bool {
134        // https://developer.apple.com/documentation/quartzcore/cametallayer/displaysyncenabled
135        available!(macos = 10.13) || cfg!(target_abi = "macabi")
136    }
137}
138
139#[derive(Debug)]
140pub struct Instance {
141    flags: wgt::InstanceFlags,
142}
143
144impl Instance {
145    pub fn create_surface_from_layer(&self, layer: &CAMetalLayer) -> Surface {
146        Surface::from_layer(layer)
147    }
148}
149
150impl crate::Instance for Instance {
151    type A = Api;
152
153    unsafe fn init(desc: &crate::InstanceDescriptor<'_>) -> Result<Self, crate::InstanceError> {
154        profiling::scope!("Init Metal Backend");
155        // We do not enable metal validation based on the validation flags as it affects the entire
156        // process. Instead, we enable the validation inside the test harness itself in tests/src/native.rs.
157        Ok(Instance { flags: desc.flags })
158    }
159
160    unsafe fn create_surface(
161        &self,
162        display_handle: raw_window_handle::RawDisplayHandle,
163        window_handle: raw_window_handle::RawWindowHandle,
164    ) -> Result<Surface, crate::InstanceError> {
165        let layer = match (display_handle, window_handle) {
166            (
167                raw_window_handle::RawDisplayHandle::AppKit(_),
168                raw_window_handle::RawWindowHandle::AppKit(handle),
169            ) => unsafe { raw_window_metal::Layer::from_ns_view(handle.ns_view) },
170            (
171                raw_window_handle::RawDisplayHandle::UiKit(_),
172                raw_window_handle::RawWindowHandle::UiKit(handle),
173            ) => unsafe { raw_window_metal::Layer::from_ui_view(handle.ui_view) },
174            _ => {
175                return Err(crate::InstanceError::new(format!(
176                    "window handle {window_handle:?} is not a Metal-compatible handle"
177                )))
178            }
179        };
180
181        // SAFETY: The layer is an initialized instance of `CAMetalLayer`, and
182        // we transfer the retain count to `Retained` using `into_raw`.
183        let layer = unsafe {
184            Retained::from_raw(layer.into_raw().cast::<CAMetalLayer>().as_ptr()).unwrap()
185        };
186
187        Ok(Surface::new(layer))
188    }
189
190    unsafe fn enumerate_adapters(
191        &self,
192        _surface_hint: Option<&Surface>,
193    ) -> Vec<crate::ExposedAdapter<Api>> {
194        let devices = objc2_metal::MTLCopyAllDevices();
195        let instance_flags = self.flags;
196        let mut adapters: Vec<crate::ExposedAdapter<Api>> = devices
197            .into_iter()
198            .map(|d| AdapterShared::expose(d, instance_flags))
199            .collect();
200        adapters.sort_by_key(|ad| {
201            (
202                ad.adapter.shared.private_caps.low_power,
203                ad.adapter.shared.private_caps.headless,
204            )
205        });
206        adapters
207    }
208}
209
210bitflags!(
211    /// Similar to `MTLCounterSamplingPoint`, but a bit higher abstracted for our purposes.
212    #[derive(Debug, Copy, Clone)]
213    pub struct TimestampQuerySupport: u32 {
214        /// On creating Metal encoders.
215        const STAGE_BOUNDARIES = 1 << 1;
216        /// Within existing draw encoders.
217        const ON_RENDER_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 2);
218        /// Within existing dispatch encoders.
219        const ON_COMPUTE_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 3);
220        /// Within existing blit encoders.
221        const ON_BLIT_ENCODER = Self::STAGE_BOUNDARIES.bits() | (1 << 4);
222
223        /// Within any wgpu render/compute pass.
224        const INSIDE_WGPU_PASSES = Self::ON_RENDER_ENCODER.bits() | Self::ON_COMPUTE_ENCODER.bits();
225    }
226);
227
228#[allow(dead_code)]
229struct CapabilitiesQuery {
230    msl_version: MTLLanguageVersion,
231    fragment_rw_storage: bool,
232    read_write_texture_tier: MTLReadWriteTextureTier,
233    msaa_desktop: bool,
234    msaa_apple3: bool,
235    msaa_apple7: bool,
236    resource_heaps: bool,
237    argument_buffers: Option<MTLArgumentBuffersTier>,
238    mutable_comparison_samplers: bool,
239    sampler_clamp_to_border: bool,
240    indirect_draw_dispatch: bool,
241    base_vertex_first_instance_drawing: bool,
242    dual_source_blending: bool,
243    low_power: bool,
244    headless: bool,
245    layered_rendering: bool,
246    function_specialization: bool,
247    depth_clip_mode: bool,
248    texture_cube_array: bool,
249    supports_float_filtering: bool,
250    format_depth24_stencil8: bool,
251    format_depth32_stencil8_filter: bool,
252    format_depth32_stencil8_none: bool,
253    format_min_srgb_channels: u8,
254    format_b5: bool,
255    format_bc: bool,
256    format_eac_etc: bool,
257    format_astc: bool,
258    format_astc_hdr: bool,
259    format_astc_3d: bool,
260    format_any8_unorm_srgb_all: bool,
261    format_any8_unorm_srgb_no_write: bool,
262    format_any8_snorm_all: bool,
263    format_r16_norm_all: bool,
264    format_r32_all: bool,
265    format_r32_no_write: bool,
266    format_r32float_no_write_no_filter: bool,
267    format_r32float_no_filter: bool,
268    format_r32float_all: bool,
269    format_rgba8_srgb_all: bool,
270    format_rgba8_srgb_no_write: bool,
271    format_rgb10a2_unorm_all: bool,
272    format_rgb10a2_unorm_no_write: bool,
273    format_rgb10a2_uint_write: bool,
274    format_rg11b10_all: bool,
275    format_rg11b10_no_write: bool,
276    format_rgb9e5_all: bool,
277    format_rgb9e5_no_write: bool,
278    format_rgb9e5_filter_only: bool,
279    format_rg32_color: bool,
280    format_rg32_color_write: bool,
281    format_rg32float_all: bool,
282    format_rg32float_color_blend: bool,
283    format_rg32float_no_filter: bool,
284    format_rgba32int_color: bool,
285    format_rgba32int_color_write: bool,
286    format_rgba32float_color: bool,
287    format_rgba32float_color_write: bool,
288    format_rgba32float_all: bool,
289    format_depth16unorm: bool,
290    format_depth16unorm_filter: bool,
291    format_depth32float_filter: bool,
292    format_depth32float_none: bool,
293    format_bgr10a2_all: bool,
294    format_bgr10a2_no_write: bool,
295    max_textures_per_stage: (ResourceIndex, ResourceIndex),
296    max_binding_array_elements: ResourceIndex,
297    max_sampler_binding_array_elements: ResourceIndex,
298    buffer_alignment: u64,
299    constant_buffer_offset_alignment: u32,
300    max_buffer_size: u64,
301    max_texture_size: u64,
302    max_texture_3d_size: u64,
303    max_texture_layers: u64,
304    max_fragment_input_components: u64,
305    max_color_render_targets: u8,
306    max_color_attachment_bytes_per_sample: u8,
307    max_inter_stage_shader_variables: u32,
308    max_threads_per_group: u32,
309    max_total_threadgroup_memory: u32,
310    sample_count_mask: crate::TextureFormatCapabilities,
311    supports_debug_markers: bool,
312    supports_binary_archives: bool,
313    supports_arrays_of_textures: bool,
314    supports_arrays_of_textures_write: bool,
315    supports_depth_clip_control: bool,
316    supports_shader_primitive_index: bool,
317    has_unified_memory: Option<bool>,
318    timestamp_query_support: TimestampQuerySupport,
319    supports_simd_scoped_operations: bool,
320    supports_cooperative_matrix: bool,
321    int64: bool,
322    int64_atomics_min_max: bool,
323    int64_atomics: bool,
324    float_atomics: bool,
325    mesh_shaders: bool,
326    max_task_workgroup_count: u32,
327    max_mesh_workgroup_count: u32,
328    max_task_payload_size: u32,
329    supported_vertex_amplification_factor: u32,
330    shader_barycentrics: bool,
331    supports_memoryless_storage: bool,
332    supports_raytracing: bool,
333    shader_per_vertex: bool,
334    supports_multisample_array: bool,
335}
336
337#[derive(Debug)]
338struct PrivateCapabilities {
339    msl_version: MTLLanguageVersion,
340    low_power: bool,
341    headless: bool,
342    has_unified_memory: Option<bool>,
343    timestamp_query_support: TimestampQuerySupport,
344    supports_memoryless_storage: bool,
345    mesh_shaders: bool,
346}
347
348#[derive(Debug)]
349struct PrivateTextureFormatCapabilities {
350    read_write_texture_tier: MTLReadWriteTextureTier,
351    sample_count_mask: crate::TextureFormatCapabilities,
352    int64_atomics: bool,
353    msaa_desktop: bool,
354    msaa_apple3: bool,
355    msaa_apple7: bool,
356    format_r32float_all: bool,
357    format_rgba8_srgb_all: bool,
358    format_rgb10a2_uint_write: bool,
359    format_rgb10a2_unorm_all: bool,
360    format_rg11b10_all: bool,
361    format_rg32float_all: bool,
362    format_rgba32float_all: bool,
363    format_depth16unorm: bool,
364    format_depth16unorm_filter: bool,
365    format_depth32float_filter: bool,
366    format_depth24_stencil8: bool,
367    format_bc: bool,
368    format_eac_etc: bool,
369    format_astc: bool,
370    format_astc_hdr: bool,
371}
372
373#[derive(Clone, Debug)]
374struct PrivateDisabilities {
375    /// Near depth is not respected properly on some Intel GPUs.
376    broken_viewport_near_depth: bool,
377    /// Multi-target clears don't appear to work properly on Intel GPUs.
378    #[allow(dead_code)]
379    broken_layered_clear_image: bool,
380}
381
382#[derive(Debug)]
383struct Settings {
384    retain_command_buffer_references: bool,
385}
386
387impl Default for Settings {
388    fn default() -> Self {
389        Self {
390            retain_command_buffer_references: true,
391        }
392    }
393}
394
395#[derive(Debug)]
396struct AdapterShared {
397    device: Retained<ProtocolObject<dyn MTLDevice>>,
398    disabilities: PrivateDisabilities,
399    private_caps: PrivateCapabilities,
400    private_texture_format_caps: PrivateTextureFormatCapabilities,
401    settings: Settings,
402    presentation_timer: time::PresentationTimer,
403}
404
405#[cfg(send_sync)]
406static_assertions::assert_impl_all!(AdapterShared: Send, Sync);
407
408impl AdapterShared {
409    fn new(
410        device: Retained<ProtocolObject<dyn MTLDevice>>,
411        capabilities_query: &CapabilitiesQuery,
412    ) -> Self {
413        let private_caps = capabilities_query.private_capabilities();
414        let private_texture_format_caps = capabilities_query.private_texture_format_capabilities();
415        log::debug!("{private_caps:#?}");
416        log::debug!("{private_texture_format_caps:#?}");
417
418        Self {
419            disabilities: PrivateDisabilities::new(&device),
420            private_caps,
421            private_texture_format_caps,
422            device,
423            settings: Settings::default(),
424            presentation_timer: time::PresentationTimer::new(),
425        }
426    }
427
428    fn expose(
429        device: Retained<ProtocolObject<dyn MTLDevice>>,
430        instance_flags: wgt::InstanceFlags,
431    ) -> crate::ExposedAdapter<Api> {
432        autoreleasepool(|_| {
433            let name = device.name().to_string();
434            let capabilities_query = CapabilitiesQuery::new(&device);
435            let shared = AdapterShared::new(device, &capabilities_query);
436            let features = capabilities_query.features();
437            let capabilities = capabilities_query.capabilities(instance_flags);
438            crate::ExposedAdapter {
439                info: wgt::AdapterInfo {
440                    name,
441                    // These are hardcoded based on typical values for Metal devices
442                    //
443                    // See <https://github.com/gpuweb/gpuweb/blob/main/proposals/subgroups.md#adapter-info>
444                    // for more information.
445                    subgroup_min_size: 4,
446                    subgroup_max_size: 64,
447                    transient_saves_memory: Some(shared.private_caps.supports_memoryless_storage),
448                    ..wgt::AdapterInfo::new(shared.private_caps.device_type(), wgt::Backend::Metal)
449                },
450                features,
451                capabilities,
452                adapter: Adapter::new(Arc::new(shared)),
453            }
454        })
455    }
456}
457
458#[derive(Debug)]
459pub struct Adapter {
460    shared: Arc<AdapterShared>,
461}
462
463#[cfg(send_sync)]
464static_assertions::assert_impl_all!(Adapter: Send, Sync);
465
466#[derive(Debug)]
467pub struct Queue {
468    shared: Arc<QueueShared>,
469    timestamp_period: f32,
470}
471
472#[cfg(send_sync)]
473static_assertions::assert_impl_all!(Queue: Send, Sync);
474
475impl Queue {
476    pub unsafe fn queue_from_raw(
477        raw: Retained<ProtocolObject<dyn MTLCommandQueue>>,
478        timestamp_period: f32,
479    ) -> Self {
480        Self {
481            shared: Arc::new(QueueShared {
482                raw,
483                command_buffer_created_not_submitted: atomic::AtomicUsize::new(0),
484                pending_waits: Mutex::new(Vec::new()),
485                pending_signals: Mutex::new(Vec::new()),
486                relay: OnceLock::new(),
487            }),
488            timestamp_period,
489        }
490    }
491
492    pub fn as_raw(&self) -> &ProtocolObject<dyn MTLCommandQueue> {
493        &self.shared.raw
494    }
495
496    /// Enable strict GPU-side ordering for [`Self::add_wait_event`].
497    ///
498    /// By default, `add_wait_event` encodes the wait on a separate
499    /// internal command buffer. Metal allows independent command
500    /// buffers in a queue to overlap on the GPU, so a wait CB does
501    /// not strictly gate subsequent user command buffers when those
502    /// CBs share no Metal-tracked resources with it. Single-stream
503    /// pipelines often serialize anyway because the GPU has no other
504    /// concurrent work to fill the slot, but mixed workloads (decode
505    /// + compute + render) can race.
506    ///
507    /// When enabled, every [`crate::CommandEncoder::begin_encoding`]
508    /// pre-encodes a wait on an internal `MTLSharedEvent` at the start
509    /// of the new command buffer; every [`crate::Queue::submit`] then
510    /// signals that event after draining the staged external waits.
511    /// All command buffers since the previous submit are released in
512    /// lockstep once the foreign signals arrive, regardless of GPU
513    /// concurrency.
514    ///
515    /// Costs one extra `encodeWaitForEvent` per command buffer plus
516    /// one extra internal command buffer per submit on this queue.
517    /// Other queues are unaffected.
518    ///
519    /// Idempotent. Cannot be disabled - once enabled, the queue stays
520    /// in strict mode for its lifetime, since command buffers already
521    /// encoded would be stranded if the relay stopped firing.
522    pub fn enable_strict_event_sync(&self) -> Result<(), crate::DeviceError> {
523        if self.shared.relay.get().is_some() {
524            return Ok(());
525        }
526        let event = self
527            .shared
528            .raw
529            .device()
530            .newSharedEvent()
531            .ok_or(crate::DeviceError::OutOfMemory)?;
532        let _ = self.shared.relay.set(Relay {
533            event,
534            next_release_value: atomic::AtomicU64::new(1),
535            commit_lock: Mutex::new(()),
536        });
537        Ok(())
538    }
539
540    /// Stage an `MTLCommandBuffer::encodeWaitForEvent(event, value)` for
541    /// the next [`crate::Queue::submit`]. Lets external producers be waited
542    /// on without a CPU block.
543    ///
544    /// By default the wait is encoded onto a dedicated internal command
545    /// buffer committed before the submit's user CBs - best-effort under
546    /// cross-CB GPU concurrency, see [`Self::enable_strict_event_sync`]
547    /// for strict gating. With strict mode enabled, the wait is chained
548    /// through an internal relay event that gates every user command
549    /// buffer encoded since the previous submit.
550    ///
551    /// Staging is queue-wide, not per-thread or per-submit: any
552    /// `add_wait_event` call is consumed by whichever
553    /// [`crate::Queue::submit`] runs next on this queue. If you stage
554    /// events from multiple threads, coordinate the staging and the
555    /// submit yourself, or another thread's submit may drain your
556    /// pending waits.
557    pub fn add_wait_event(&self, event: Retained<ProtocolObject<dyn MTLSharedEvent>>, value: u64) {
558        self.shared.pending_waits.lock().push((event, value));
559    }
560
561    /// Remove `event` from the pending wait list if it is still present.
562    /// Returns `true` if it was found and removed.
563    pub fn remove_wait_event(&self, event: &ProtocolObject<dyn MTLSharedEvent>) -> bool {
564        let target: *const ProtocolObject<dyn MTLSharedEvent> = event;
565        let mut waits = self.shared.pending_waits.lock();
566        let before = waits.len();
567        waits.retain(|(e, _)| Retained::as_ptr(e) != target);
568        waits.len() != before
569    }
570
571    /// Stage an `MTLCommandBuffer::encodeSignalEvent(event, value)` for
572    /// the next [`crate::Queue::submit`]. The signal is encoded after
573    /// the submit's own completion signal, so a foreign API waiting on
574    /// `(event, value)` observes the wgpu work as done.
575    ///
576    /// Staging is queue-wide, not per-thread or per-submit: see
577    /// [`Self::add_wait_event`] for the threading caveat.
578    pub fn add_signal_event(
579        &self,
580        event: Retained<ProtocolObject<dyn MTLSharedEvent>>,
581        value: u64,
582    ) {
583        self.shared.pending_signals.lock().push((event, value));
584    }
585
586    /// Remove `event` from the pending signal list if it is still present.
587    /// Returns `true` if it was found and removed.
588    pub fn remove_signal_event(&self, event: &ProtocolObject<dyn MTLSharedEvent>) -> bool {
589        let target: *const ProtocolObject<dyn MTLSharedEvent> = event;
590        let mut signals = self.shared.pending_signals.lock();
591        let before = signals.len();
592        signals.retain(|(e, _)| Retained::as_ptr(e) != target);
593        signals.len() != before
594    }
595}
596
597type PendingEvents = Mutex<Vec<(Retained<ProtocolObject<dyn MTLSharedEvent>>, u64)>>;
598
599/// Internal relay used by [`Queue::enable_strict_event_sync`] to chain
600/// staged waits across all CBs in a submit.
601///
602/// `begin_encoding` reads `next_release_value` and pre-encodes
603/// `encodeWaitForEvent(event, expected)` at the start of each CB.
604/// `submit` claims the value via `fetch_add`, encodes the foreign
605/// waits + `encodeSignalEvent(event, claimed)` on a wait CB, and
606/// commits it. `commit_lock` serializes the claim+commit pair so
607/// concurrent submits land their signals in monotonic *commit* order
608/// on the CPU. GPU-side execution of the resulting wait CBs may still
609/// reorder under concurrency; see the comment in `submit` for why
610/// that's harmless.
611#[derive(Debug)]
612struct Relay {
613    event: Retained<ProtocolObject<dyn MTLSharedEvent>>,
614    next_release_value: atomic::AtomicU64,
615    commit_lock: Mutex<()>,
616}
617
618#[derive(Debug)]
619pub struct QueueShared {
620    raw: Retained<ProtocolObject<dyn MTLCommandQueue>>,
621    // Tracks command buffers created via `CommandEncoder::begin_encoding` that
622    // have not yet been submitted or discarded. Used to proactively fail
623    // before hitting Metal's `maxCommandBufferCount`.
624    //
625    // (In a few places we call `.commandBuffer{,WithUnretainedReferences}` directly
626    // to create command buffers for internal purposes. In those cases we always
627    // commit the buffer immediately, so we don't adjust the counter for them.)
628    command_buffer_created_not_submitted: atomic::AtomicUsize,
629    pending_waits: PendingEvents,
630    pending_signals: PendingEvents,
631    relay: OnceLock<Relay>,
632}
633
634#[derive(Debug)]
635pub struct Device {
636    shared: Arc<AdapterShared>,
637    features: wgt::Features,
638    counters: Arc<wgt::HalCounters>,
639    limits: wgt::Limits,
640}
641
642#[derive(Debug)]
643pub struct Surface {
644    render_layer: Mutex<Retained<CAMetalLayer>>,
645    swapchain_format: RwLock<Option<wgt::TextureFormat>>,
646    extent: RwLock<wgt::Extent3d>,
647}
648
649unsafe impl Send for Surface {}
650unsafe impl Sync for Surface {}
651
652#[derive(Debug)]
653pub struct SurfaceTexture {
654    texture: Texture,
655    // Useful for UI-intensive applications that are sensitive to
656    // window resizing.
657    drawable: Retained<ProtocolObject<dyn MTLDrawable>>,
658    present_with_transaction: bool,
659}
660
661impl crate::DynSurfaceTexture for SurfaceTexture {}
662
663impl core::borrow::Borrow<Texture> for SurfaceTexture {
664    fn borrow(&self) -> &Texture {
665        &self.texture
666    }
667}
668
669impl core::borrow::Borrow<dyn crate::DynTexture> for SurfaceTexture {
670    fn borrow(&self) -> &dyn crate::DynTexture {
671        &self.texture
672    }
673}
674
675unsafe impl Send for SurfaceTexture {}
676unsafe impl Sync for SurfaceTexture {}
677
678impl crate::Queue for Queue {
679    type A = Api;
680
681    unsafe fn submit(
682        &self,
683        command_buffers: &[&CommandBuffer],
684        _surface_textures: &[&SurfaceTexture],
685        (signal_fence, signal_value): (&Fence, crate::FenceValue),
686    ) -> Result<(), crate::DeviceError> {
687        autoreleasepool(|_| {
688            // Drain caller-staged waits onto a dedicated command buffer
689            // committed before the user CBs.
690            //
691            // When strict event sync is enabled, this CB also signals
692            // the relay event to release every user CB encoded since
693            // the previous submit (see `Queue::enable_strict_event_sync`).
694            // The `commit_lock` is held across `fetch_add` + `commit` so
695            // concurrent submits land their relay signals in monotonic
696            // *commit* order on the CPU side; otherwise a later-claimed
697            // signal could commit first and the subsequent backward
698            // signal would temporarily regress the relay's signaledValue.
699            //
700            // GPU-side ordering across independent wait CBs remains
701            // best-effort: Metal may run them in parallel, so a wait CB
702            // with foreign waits can fire its signal after a later
703            // submit's wait-free signal. CBs already released stay
704            // released (`MTLSharedEvent` waits are `>=`), and future
705            // submits' signals catch the value back up, so the regression
706            // is harmless - but users wanting strict GPU-side ordering
707            // across concurrent submits must serialize submits themselves.
708            //
709            // Without strict mode, we only emit a wait CB when there are
710            // pending waits - keeps the common-case submit overhead-free.
711            {
712                let relay = self.shared.relay.get();
713                let mut waits = self.shared.pending_waits.lock();
714                if relay.is_some() || !waits.is_empty() {
715                    let _commit_guard = relay.map(|r| r.commit_lock.lock());
716                    // We do not bother adjusting `command_buffer_created_not_submitted`
717                    // because we immediately commit this buffer.
718                    let wait_cb = self
719                        .shared
720                        .raw
721                        .commandBufferWithUnretainedReferences()
722                        .ok_or(crate::DeviceError::Lost)?;
723                    wait_cb.setLabel(Some(ns_string!("(wgpu internal) Wait")));
724                    for (event, value) in waits.drain(..) {
725                        wait_cb.encodeWaitForEvent_value(event.as_ref(), value);
726                    }
727                    if let Some(relay) = relay {
728                        let release = relay
729                            .next_release_value
730                            .fetch_add(1, atomic::Ordering::AcqRel);
731                        wait_cb.encodeSignalEvent_value(relay.event.as_ref(), release);
732                    }
733                    wait_cb.commit();
734                }
735            }
736
737            let extra_command_buffer = {
738                let fence_sync = Arc::clone(&signal_fence.sync);
739                let block = block2::RcBlock::new(move |_cmd_buf| {
740                    *fence_sync.0.lock() = signal_value;
741                    fence_sync.1.notify_all();
742                });
743
744                let raw = match command_buffers.last() {
745                    Some(&cmd_buf) => cmd_buf.raw.clone(),
746                    None => {
747                        // We do not bother adjusting `command_buffer_created_not_submitted`
748                        // because we immediately commit this buffer.
749                        self.shared
750                            .raw
751                            .commandBufferWithUnretainedReferences()
752                            .ok_or(crate::DeviceError::Lost)?
753                    }
754                };
755                raw.setLabel(Some(ns_string!("(wgpu internal) Signal")));
756                unsafe { raw.addCompletedHandler(block2::RcBlock::as_ptr(&block)) };
757
758                signal_fence.maintain();
759                signal_fence
760                    .pending_command_buffers
761                    .write()
762                    .push((signal_value, raw.clone()));
763
764                if let Some(shared_event) = &signal_fence.shared_event {
765                    raw.encodeSignalEvent_value(shared_event.as_ref(), signal_value);
766                }
767
768                // Drain caller-staged signals after our own signal so each
769                // additional event value publishes once the submit completes.
770                {
771                    let mut signals = self.shared.pending_signals.lock();
772                    for (event, value) in signals.drain(..) {
773                        raw.encodeSignalEvent_value(event.as_ref(), value);
774                    }
775                }
776
777                // only return an extra one if it's extra
778                match command_buffers.last() {
779                    Some(_) => None,
780                    None => Some(raw),
781                }
782            };
783
784            for cmd_buffer in command_buffers {
785                cmd_buffer.raw.commit();
786                // One command buffer per `end_encoding` call moves from the
787                // "created but not yet submitted" bucket into the submitted
788                // set, so update the counter.
789                let previous = self
790                    .shared
791                    .command_buffer_created_not_submitted
792                    .fetch_sub(1, atomic::Ordering::AcqRel);
793                debug_assert!(previous > 0);
794            }
795
796            if let Some(raw) = extra_command_buffer {
797                raw.commit();
798            }
799            Ok(())
800        })
801    }
802    unsafe fn present(
803        &self,
804        _surface: &Surface,
805        texture: SurfaceTexture,
806    ) -> Result<(), crate::SurfaceError> {
807        autoreleasepool(|_| {
808            // We do not bother adjusting `command_buffer_created_not_submitted`
809            // because we immediately commit this buffer.
810            let command_buffer = self.shared.raw.commandBuffer().unwrap();
811            command_buffer.setLabel(Some(ns_string!("(wgpu internal) Present")));
812
813            // https://developer.apple.com/documentation/quartzcore/cametallayer/1478157-presentswithtransaction?language=objc
814            if !texture.present_with_transaction {
815                command_buffer.presentDrawable(&texture.drawable);
816            }
817
818            command_buffer.commit();
819
820            if texture.present_with_transaction {
821                command_buffer.waitUntilScheduled();
822                texture.drawable.present();
823            }
824        });
825        Ok(())
826    }
827
828    unsafe fn get_timestamp_period(&self) -> f32 {
829        self.timestamp_period
830    }
831
832    unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> {
833        autoreleasepool(|_| {
834            let command_buffer = self.shared.raw.commandBuffer().unwrap();
835            command_buffer.setLabel(Some(ns_string!("(wgpu internal) wait_for_idle")));
836            command_buffer.commit();
837            command_buffer.waitUntilCompleted();
838        });
839        Ok(())
840    }
841}
842
843#[derive(Debug)]
844pub struct Buffer {
845    raw: Retained<ProtocolObject<dyn MTLBuffer>>,
846    size: wgt::BufferAddress,
847}
848
849unsafe impl Send for Buffer {}
850unsafe impl Sync for Buffer {}
851
852impl crate::DynBuffer for Buffer {}
853
854impl Buffer {
855    fn as_raw(&self) -> NonNull<ProtocolObject<dyn MTLBuffer>> {
856        unsafe { NonNull::new_unchecked(Retained::as_ptr(&self.raw) as *mut _) }
857    }
858}
859
860impl crate::BufferBinding<'_, Buffer> {
861    fn resolve_size(&self) -> wgt::BufferAddress {
862        match self.size {
863            Some(size) => size.get(),
864            None => self.buffer.size - self.offset,
865        }
866    }
867}
868
869#[derive(Debug)]
870pub struct Texture {
871    raw: Retained<ProtocolObject<dyn MTLTexture>>,
872    format: wgt::TextureFormat,
873    raw_type: MTLTextureType,
874    array_layers: u32,
875    mip_levels: u32,
876    copy_size: crate::CopyExtent,
877
878    // The `drop_guard` field must be the last field of this struct so it is dropped last.
879    // Do not add new fields after it.
880    _drop_guard: Option<crate::DropGuard>,
881}
882
883impl Texture {
884    pub fn raw_handle(&self) -> &ProtocolObject<dyn MTLTexture> {
885        &self.raw
886    }
887}
888
889impl crate::DynTexture for Texture {}
890
891unsafe impl Send for Texture {}
892unsafe impl Sync for Texture {}
893
894#[derive(Debug)]
895pub struct TextureView {
896    raw: Retained<ProtocolObject<dyn MTLTexture>>,
897    aspects: crate::FormatAspects,
898}
899
900impl crate::DynTextureView for TextureView {}
901
902unsafe impl Send for TextureView {}
903unsafe impl Sync for TextureView {}
904
905impl TextureView {
906    fn as_raw(&self) -> NonNull<ProtocolObject<dyn MTLTexture>> {
907        unsafe { NonNull::new_unchecked(Retained::as_ptr(&self.raw) as *mut _) }
908    }
909}
910
911#[derive(Debug)]
912pub struct Sampler {
913    raw: Retained<ProtocolObject<dyn MTLSamplerState>>,
914}
915
916impl crate::DynSampler for Sampler {}
917
918#[cfg(send_sync)]
919static_assertions::assert_impl_all!(Sampler: Send, Sync);
920
921impl Sampler {
922    fn as_raw(&self) -> NonNull<ProtocolObject<dyn MTLSamplerState>> {
923        unsafe { NonNull::new_unchecked(Retained::as_ptr(&self.raw) as *mut _) }
924    }
925}
926
927#[derive(Debug)]
928pub struct BindGroupLayout {
929    /// Sorted list of BGL entries.
930    entries: Arc<[wgt::BindGroupLayoutEntry]>,
931}
932
933impl crate::DynBindGroupLayout for BindGroupLayout {}
934
935#[derive(Clone, Debug, Default)]
936struct ResourceData<T> {
937    buffers: T,
938    textures: T,
939    samplers: T,
940}
941
942#[derive(Clone, Debug, Default)]
943struct MultiStageData<T> {
944    vs: T,
945    fs: T,
946    cs: T,
947    ts: T,
948    ms: T,
949}
950
951const NAGA_STAGES: MultiStageData<naga::ShaderStage> = MultiStageData {
952    vs: naga::ShaderStage::Vertex,
953    fs: naga::ShaderStage::Fragment,
954    cs: naga::ShaderStage::Compute,
955    ts: naga::ShaderStage::Task,
956    ms: naga::ShaderStage::Mesh,
957};
958
959impl<T> ops::Index<naga::ShaderStage> for MultiStageData<T> {
960    type Output = T;
961    fn index(&self, stage: naga::ShaderStage) -> &T {
962        match stage {
963            naga::ShaderStage::Vertex => &self.vs,
964            naga::ShaderStage::Fragment => &self.fs,
965            naga::ShaderStage::Compute => &self.cs,
966            naga::ShaderStage::Task => &self.ts,
967            naga::ShaderStage::Mesh => &self.ms,
968            naga::ShaderStage::RayGeneration
969            | naga::ShaderStage::AnyHit
970            | naga::ShaderStage::ClosestHit
971            | naga::ShaderStage::Miss => unimplemented!(),
972        }
973    }
974}
975
976impl<T> MultiStageData<T> {
977    fn map_ref<Y>(&self, fun: impl Fn(&T) -> Y) -> MultiStageData<Y> {
978        MultiStageData {
979            vs: fun(&self.vs),
980            fs: fun(&self.fs),
981            cs: fun(&self.cs),
982            ts: fun(&self.ts),
983            ms: fun(&self.ms),
984        }
985    }
986    fn map<Y>(self, fun: impl Fn(T) -> Y) -> MultiStageData<Y> {
987        MultiStageData {
988            vs: fun(self.vs),
989            fs: fun(self.fs),
990            cs: fun(self.cs),
991            ts: fun(self.ts),
992            ms: fun(self.ms),
993        }
994    }
995    fn iter<'a>(&'a self) -> impl Iterator<Item = &'a T> {
996        iter::once(&self.vs)
997            .chain(iter::once(&self.fs))
998            .chain(iter::once(&self.cs))
999            .chain(iter::once(&self.ts))
1000            .chain(iter::once(&self.ms))
1001    }
1002    fn iter_mut<'a>(&'a mut self) -> impl Iterator<Item = &'a mut T> {
1003        iter::once(&mut self.vs)
1004            .chain(iter::once(&mut self.fs))
1005            .chain(iter::once(&mut self.cs))
1006            .chain(iter::once(&mut self.ts))
1007            .chain(iter::once(&mut self.ms))
1008    }
1009}
1010
1011type MultiStageResourceCounters = MultiStageData<ResourceData<ResourceIndex>>;
1012type MultiStageResources = MultiStageData<naga::back::msl::EntryPointResources>;
1013
1014#[derive(Debug)]
1015struct BindGroupLayoutInfo {
1016    base_resource_indices: MultiStageResourceCounters,
1017}
1018
1019#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1020struct ImmediateDataInfo {
1021    count: u32,
1022    buffer_index: ResourceIndex,
1023}
1024
1025#[derive(Debug)]
1026pub struct PipelineLayout {
1027    bind_group_infos: [Option<BindGroupLayoutInfo>; crate::MAX_BIND_GROUPS],
1028    immediates_infos: MultiStageData<Option<ImmediateDataInfo>>,
1029    total_immediates: u32,
1030    per_stage_map: MultiStageResources,
1031    binding_array_length_map: FastHashMap<naga::ResourceBinding, u32>,
1032}
1033
1034impl crate::DynPipelineLayout for PipelineLayout {}
1035
1036#[derive(Debug)]
1037enum BufferLikeResource {
1038    Buffer {
1039        ptr: NonNull<ProtocolObject<dyn MTLBuffer>>,
1040        offset: wgt::BufferAddress,
1041        dynamic_index: Option<u32>,
1042
1043        /// The buffer's size, if it is a [`Storage`] binding. Otherwise `None`.
1044        ///
1045        /// Buffers with the [`wgt::BufferBindingType::Storage`] binding type can
1046        /// hold WGSL runtime-sized arrays. When one does, we must pass its size to
1047        /// shader entry points to implement bounds checks and WGSL's `arrayLength`
1048        /// function. See `device::CompiledShader::sized_bindings` for details.
1049        ///
1050        /// [`Storage`]: wgt::BufferBindingType::Storage
1051        binding_size: Option<wgt::BufferSize>,
1052
1053        binding_location: u32,
1054    },
1055
1056    /// Bindless storage `binding_array`: one argument [`MTLBuffer`] (pointer table) plus element
1057    /// byte sizes `(array index, size)` for `_buffer_sizes` / runtime-sized arrays.
1058    ///
1059    /// [`MTLBuffer`]: objc2_metal::MTLBuffer
1060    StorageBindingArray {
1061        ptr: NonNull<ProtocolObject<dyn MTLBuffer>>,
1062        array_element_sizes: Vec<(u32, wgt::BufferSize)>,
1063        binding_location: u32,
1064    },
1065    AccelerationStructure(NonNull<ProtocolObject<dyn MTLAccelerationStructure>>),
1066}
1067
1068#[derive(Debug)]
1069struct UseResourceInfo {
1070    uses: MTLResourceUsage,
1071    stages: MTLRenderStages,
1072    visible_in_compute: bool,
1073}
1074
1075impl Default for UseResourceInfo {
1076    fn default() -> Self {
1077        Self {
1078            uses: MTLResourceUsage::empty(),
1079            stages: MTLRenderStages::empty(),
1080            visible_in_compute: false,
1081        }
1082    }
1083}
1084
1085#[derive(Debug, Default)]
1086pub struct BindGroup {
1087    counters: MultiStageResourceCounters,
1088    buffers: Vec<BufferLikeResource>,
1089    samplers: Vec<NonNull<ProtocolObject<dyn MTLSamplerState>>>,
1090    textures: Vec<NonNull<ProtocolObject<dyn MTLTexture>>>,
1091
1092    argument_buffers: Vec<Retained<ProtocolObject<dyn MTLBuffer>>>,
1093    resources_to_use: HashMap<NonNull<ProtocolObject<dyn MTLResource>>, UseResourceInfo>,
1094}
1095
1096impl crate::DynBindGroup for BindGroup {}
1097
1098unsafe impl Send for BindGroup {}
1099unsafe impl Sync for BindGroup {}
1100
1101#[derive(Debug)]
1102pub enum ShaderModuleSource {
1103    Naga(crate::NagaShader),
1104    Passthrough(PassthroughShader),
1105}
1106
1107#[derive(Debug)]
1108pub struct PassthroughShader {
1109    pub library: Retained<ProtocolObject<dyn MTLLibrary>>,
1110    pub num_workgroups: HashMap<String, (u32, u32, u32)>,
1111}
1112
1113#[cfg(send_sync)]
1114static_assertions::assert_impl_all!(PassthroughShader: Send, Sync);
1115
1116#[derive(Debug)]
1117pub struct ShaderModule {
1118    source: ShaderModuleSource,
1119    runtime_checks: wgt::ShaderRuntimeChecks,
1120}
1121
1122impl crate::DynShaderModule for ShaderModule {}
1123
1124#[derive(Debug)]
1125struct PipelineStageInfo {
1126    #[allow(dead_code)]
1127    library: Option<Retained<ProtocolObject<dyn MTLLibrary>>>,
1128    immediates: Option<ImmediateDataInfo>,
1129
1130    /// The buffer argument table index at which we pass runtime-sized arrays' buffer sizes.
1131    ///
1132    /// See `device::CompiledShader::sized_bindings` for more details.
1133    sizes_slot: Option<naga::back::msl::Slot>,
1134
1135    /// Bindings of all WGSL `storage` globals that contain runtime-sized arrays.
1136    ///
1137    /// See `device::CompiledShader::sized_bindings` for more details.
1138    sized_bindings: Vec<(naga::ResourceBinding, u32)>,
1139
1140    /// Info on all bound vertex buffers.
1141    vertex_buffer_mappings: Vec<naga::back::msl::VertexBufferMapping>,
1142
1143    /// The workgroup size for compute, task or mesh stages
1144    raw_wg_size: MTLSize,
1145
1146    /// The workgroup memory sizes for compute task or mesh stages
1147    work_group_memory_sizes: Vec<u32>,
1148}
1149
1150// TODO(madsmtm): Derive this when a release with
1151// https://github.com/madsmtm/objc2/issues/804 is available (likely 0.4).
1152impl Default for PipelineStageInfo {
1153    fn default() -> Self {
1154        Self {
1155            library: Default::default(),
1156            immediates: Default::default(),
1157            sizes_slot: Default::default(),
1158            sized_bindings: Default::default(),
1159            vertex_buffer_mappings: Default::default(),
1160            raw_wg_size: MTLSize {
1161                width: 0,
1162                height: 0,
1163                depth: 0,
1164            },
1165            work_group_memory_sizes: Default::default(),
1166        }
1167    }
1168}
1169
1170impl PipelineStageInfo {
1171    fn clear(&mut self) {
1172        self.immediates = None;
1173        self.sizes_slot = None;
1174        self.sized_bindings.clear();
1175        self.vertex_buffer_mappings.clear();
1176        self.library = None;
1177        self.work_group_memory_sizes.clear();
1178        self.raw_wg_size = MTLSize {
1179            width: 0,
1180            height: 0,
1181            depth: 0,
1182        };
1183    }
1184
1185    fn assign_from(&mut self, other: &Self) {
1186        self.immediates = other.immediates;
1187        self.sizes_slot = other.sizes_slot;
1188        self.sized_bindings.clear();
1189        self.sized_bindings.extend_from_slice(&other.sized_bindings);
1190        self.vertex_buffer_mappings.clear();
1191        self.vertex_buffer_mappings
1192            .extend_from_slice(&other.vertex_buffer_mappings);
1193        self.library = Some(other.library.as_ref().unwrap().clone());
1194        self.raw_wg_size = other.raw_wg_size;
1195        self.work_group_memory_sizes.clear();
1196        self.work_group_memory_sizes
1197            .extend_from_slice(&other.work_group_memory_sizes);
1198    }
1199}
1200
1201#[derive(Debug)]
1202pub struct RenderPipeline {
1203    raw: Retained<ProtocolObject<dyn MTLRenderPipelineState>>,
1204    vs_info: Option<PipelineStageInfo>,
1205    fs_info: Option<PipelineStageInfo>,
1206    ts_info: Option<PipelineStageInfo>,
1207    ms_info: Option<PipelineStageInfo>,
1208    raw_primitive_type: MTLPrimitiveType,
1209    raw_triangle_fill_mode: MTLTriangleFillMode,
1210    raw_front_winding: MTLWinding,
1211    raw_cull_mode: MTLCullMode,
1212    raw_depth_clip_mode: Option<MTLDepthClipMode>,
1213    depth_stencil: Option<(
1214        Retained<ProtocolObject<dyn MTLDepthStencilState>>,
1215        wgt::DepthBiasState,
1216    )>,
1217}
1218
1219#[cfg(send_sync)]
1220static_assertions::assert_impl_all!(RenderPipeline: Send, Sync);
1221
1222impl crate::DynRenderPipeline for RenderPipeline {}
1223
1224#[derive(Debug)]
1225pub struct ComputePipeline {
1226    raw: Retained<ProtocolObject<dyn MTLComputePipelineState>>,
1227    cs_info: PipelineStageInfo,
1228}
1229
1230#[cfg(send_sync)]
1231static_assertions::assert_impl_all!(ComputePipeline: Send, Sync);
1232
1233impl crate::DynComputePipeline for ComputePipeline {}
1234
1235#[derive(Debug)]
1236pub struct RayTracingPipeline {}
1237
1238impl crate::DynRayTracingPipeline for RayTracingPipeline {}
1239
1240#[derive(Debug, Clone)]
1241pub struct QuerySet {
1242    raw_buffer: Retained<ProtocolObject<dyn MTLBuffer>>,
1243    //Metal has a custom buffer for counters.
1244    counter_sample_buffer: Option<Retained<ProtocolObject<dyn MTLCounterSampleBuffer>>>,
1245    ty: wgt::QueryType,
1246}
1247
1248impl crate::DynQuerySet for QuerySet {}
1249
1250unsafe impl Send for QuerySet {}
1251unsafe impl Sync for QuerySet {}
1252
1253#[derive(Debug)]
1254pub struct Fence {
1255    sync: Arc<(Mutex<crate::FenceValue>, Condvar)>,
1256    /// The pending fence values have to be ascending.
1257    pending_command_buffers: RwLock<Vec<PendingCommandBuffer>>,
1258    shared_event: Option<Retained<ProtocolObject<dyn MTLSharedEvent>>>,
1259}
1260
1261type PendingCommandBuffer = (
1262    crate::FenceValue,
1263    Retained<ProtocolObject<dyn MTLCommandBuffer>>,
1264);
1265
1266impl crate::DynFence for Fence {}
1267
1268unsafe impl Send for Fence {}
1269unsafe impl Sync for Fence {}
1270
1271impl Fence {
1272    fn get_latest(&self) -> crate::FenceValue {
1273        let mut max_value = *self.sync.0.lock();
1274        let pending_command_buffers = self.pending_command_buffers.read();
1275        for &(value, ref cmd_buf) in pending_command_buffers.iter() {
1276            match cmd_buf.status() {
1277                MTLCommandBufferStatus::Completed | MTLCommandBufferStatus::Error => {
1278                    max_value = value;
1279                }
1280                _ => {}
1281            }
1282        }
1283        max_value
1284    }
1285
1286    fn maintain(&self) {
1287        let latest = self.get_latest();
1288        self.pending_command_buffers
1289            .write()
1290            .retain(|&(value, _)| value > latest);
1291    }
1292
1293    pub fn raw_shared_event(&self) -> Option<&ProtocolObject<dyn MTLSharedEvent>> {
1294        self.shared_event.as_deref()
1295    }
1296}
1297
1298struct IndexState {
1299    buffer_ptr: NonNull<ProtocolObject<dyn MTLBuffer>>,
1300    offset: wgt::BufferAddress,
1301    stride: wgt::BufferAddress,
1302    raw_type: MTLIndexType,
1303}
1304
1305#[derive(Default)]
1306struct Temp {
1307    binding_sizes: Vec<u32>,
1308}
1309
1310// Any state in this struct that may be dirty after an abandoned encoding must
1311// be reset in `discard_encoding` for possible encoder reuse.
1312struct CommandState {
1313    blit: Option<Retained<ProtocolObject<dyn MTLBlitCommandEncoder>>>,
1314    acceleration_structure_builder:
1315        Option<Retained<ProtocolObject<dyn MTLAccelerationStructureCommandEncoder>>>,
1316    render: Option<Retained<ProtocolObject<dyn MTLRenderCommandEncoder>>>,
1317    compute: Option<Retained<ProtocolObject<dyn MTLComputeCommandEncoder>>>,
1318    raw_primitive_type: MTLPrimitiveType,
1319    index: Option<IndexState>,
1320    stage_infos: MultiStageData<PipelineStageInfo>,
1321
1322    /// Sizes of currently bound [`wgt::BufferBindingType::Storage`] buffers.
1323    ///
1324    /// Specifically:
1325    ///
1326    /// - The keys are [`ResourceBinding`] values (that is, the WGSL `@group`
1327    ///   and `@binding` attributes) for `var<storage>` global variables in the
1328    ///   current module that contain runtime-sized arrays.
1329    ///
1330    /// - The values are the actual sizes of the buffers currently bound to
1331    ///   provide those globals' contents, which are needed to implement bounds
1332    ///   checks and the WGSL `arrayLength` function.
1333    ///
1334    /// For each stage `S` in `stage_infos`, we consult this to find the sizes
1335    /// of the buffers listed in `stage_infos.S.sized_bindings`, which we must
1336    /// pass to the entry point.
1337    ///
1338    /// See `device::CompiledShader::sized_bindings` for more details.
1339    ///
1340    /// [`ResourceBinding`]: naga::ResourceBinding
1341    storage_buffer_length_map: FastHashMap<(naga::ResourceBinding, u32), wgt::BufferSize>,
1342
1343    vertex_buffer_size_map: FastHashMap<u32, wgt::BufferSize>,
1344
1345    immediates: Vec<u32>,
1346
1347    /// Timer query that should be executed when the next pass starts.
1348    pending_timer_queries: Vec<(QuerySet, u32)>,
1349}
1350
1351// Any state in this struct that may be dirty after an abandoned encoding must
1352// be reset in `discard_encoding` for possible encoder reuse.
1353pub struct CommandEncoder {
1354    shared: Arc<AdapterShared>,
1355    queue_shared: Arc<QueueShared>,
1356    raw_cmd_buf: Option<Retained<ProtocolObject<dyn MTLCommandBuffer>>>,
1357    state: CommandState,
1358    temp: Temp,
1359    counters: Arc<wgt::HalCounters>,
1360}
1361
1362impl fmt::Debug for CommandEncoder {
1363    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1364        f.debug_struct("CommandEncoder")
1365            .field("raw_cmd_buf", &self.raw_cmd_buf)
1366            .finish()
1367    }
1368}
1369
1370unsafe impl Send for CommandEncoder {}
1371unsafe impl Sync for CommandEncoder {}
1372
1373#[derive(Debug)]
1374pub struct CommandBuffer {
1375    raw: Retained<ProtocolObject<dyn MTLCommandBuffer>>,
1376    queue_shared: Arc<QueueShared>,
1377}
1378
1379impl crate::DynCommandBuffer for CommandBuffer {}
1380
1381unsafe impl Send for CommandBuffer {}
1382unsafe impl Sync for CommandBuffer {}
1383
1384#[derive(Debug)]
1385pub struct PipelineCache;
1386
1387impl crate::DynPipelineCache for PipelineCache {}
1388
1389#[derive(Debug)]
1390pub struct AccelerationStructure {
1391    raw: Retained<ProtocolObject<dyn MTLAccelerationStructure>>,
1392}
1393
1394impl AccelerationStructure {
1395    fn as_raw(&self) -> NonNull<ProtocolObject<dyn MTLAccelerationStructure>> {
1396        unsafe { NonNull::new_unchecked(Retained::as_ptr(&self.raw) as *mut _) }
1397    }
1398}
1399
1400impl crate::DynAccelerationStructure for AccelerationStructure {}
1401unsafe impl Send for AccelerationStructure {}
1402unsafe impl Sync for AccelerationStructure {}
1403
1404#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1405pub enum OsType {
1406    Macos,
1407    Ios,
1408    Tvos,
1409    VisionOs,
1410}