wgpu_hal/vulkan/
mod.rs

1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8  - temporarily allocating `Vec` on heap, where overhead is permitted
9  - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29pub mod conv;
30mod descriptor;
31mod device;
32mod drm;
33mod instance;
34mod sampler;
35mod semaphore_list;
36mod swapchain;
37
38pub use adapter::PhysicalDeviceFeatures;
39
40use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
41use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
42
43use arrayvec::ArrayVec;
44use ash::{ext, khr, vk};
45use bytemuck::{Pod, Zeroable};
46use hashbrown::HashSet;
47use parking_lot::{Mutex, RwLock};
48
49use naga::FastHashMap;
50use wgt::InternalCounter;
51
52use semaphore_list::SemaphoreList;
53
54use crate::vulkan::semaphore_list::{SemaphoreListMode, SemaphoreType};
55
56const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
57
58#[derive(Clone, Debug)]
59pub struct Api;
60
61impl crate::Api for Api {
62    const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
63
64    type Instance = Instance;
65    type Surface = Surface;
66    type Adapter = Adapter;
67    type Device = Device;
68
69    type Queue = Queue;
70    type CommandEncoder = CommandEncoder;
71    type CommandBuffer = CommandBuffer;
72
73    type Buffer = Buffer;
74    type Texture = Texture;
75    type SurfaceTexture = SurfaceTexture;
76    type TextureView = TextureView;
77    type Sampler = Sampler;
78    type QuerySet = QuerySet;
79    type Fence = Fence;
80    type AccelerationStructure = AccelerationStructure;
81    type PipelineCache = PipelineCache;
82
83    type BindGroupLayout = BindGroupLayout;
84    type BindGroup = BindGroup;
85    type PipelineLayout = PipelineLayout;
86    type ShaderModule = ShaderModule;
87    type RenderPipeline = RenderPipeline;
88    type ComputePipeline = ComputePipeline;
89    type RayTracingPipeline = RayTracingPipeline;
90}
91
92crate::impl_dyn_resource!(
93    Adapter,
94    AccelerationStructure,
95    BindGroup,
96    BindGroupLayout,
97    Buffer,
98    CommandBuffer,
99    CommandEncoder,
100    ComputePipeline,
101    Device,
102    Fence,
103    Instance,
104    PipelineCache,
105    PipelineLayout,
106    QuerySet,
107    Queue,
108    RenderPipeline,
109    RayTracingPipeline,
110    Sampler,
111    ShaderModule,
112    Surface,
113    SurfaceTexture,
114    Texture,
115    TextureView
116);
117
118struct DebugUtils {
119    extension: ext::debug_utils::Instance,
120    messenger: vk::DebugUtilsMessengerEXT,
121
122    /// Owning pointer to the debug messenger callback user data.
123    ///
124    /// `InstanceShared::drop` destroys the debug messenger before
125    /// dropping this, so the callback should never receive a dangling
126    /// user data pointer.
127    #[allow(dead_code)]
128    callback_data: Box<DebugUtilsMessengerUserData>,
129}
130
131#[derive(Debug)]
132pub struct DebugUtilsCreateInfo {
133    severity: vk::DebugUtilsMessageSeverityFlagsEXT,
134    message_type: vk::DebugUtilsMessageTypeFlagsEXT,
135    callback_data: Box<DebugUtilsMessengerUserData>,
136}
137
138#[derive(Debug)]
139/// The properties related to the validation layer needed for the
140/// DebugUtilsMessenger for their workarounds
141struct ValidationLayerProperties {
142    /// Validation layer description, from `vk::LayerProperties`.
143    layer_description: CString,
144
145    /// Validation layer specification version, from `vk::LayerProperties`.
146    layer_spec_version: u32,
147}
148
149/// User data needed by `instance::debug_utils_messenger_callback`.
150///
151/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
152/// pointer refers to one of these values.
153#[derive(Debug)]
154pub struct DebugUtilsMessengerUserData {
155    /// The properties related to the validation layer, if present
156    validation_layer_properties: Option<ValidationLayerProperties>,
157
158    /// If the OBS layer is present. OBS never increments the version of their layer,
159    /// so there's no reason to have the version.
160    has_obs_layer: bool,
161}
162
163pub struct InstanceShared {
164    raw: ash::Instance,
165    extensions: Vec<&'static CStr>,
166    flags: wgt::InstanceFlags,
167    memory_budget_thresholds: wgt::MemoryBudgetThresholds,
168    debug_utils: Option<DebugUtils>,
169    get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
170    entry: ash::Entry,
171    has_nv_optimus: bool,
172    android_sdk_version: u32,
173    /// The instance API version.
174    ///
175    /// Which is the version of Vulkan supported for instance-level functionality.
176    ///
177    /// It is associated with a `VkInstance` and its children,
178    /// except for a `VkPhysicalDevice` and its children.
179    instance_api_version: u32,
180
181    // The `drop_guard` field must be the last field of this struct so it is dropped last.
182    // Do not add new fields after it.
183    drop_guard: Option<crate::DropGuard>,
184}
185
186impl fmt::Debug for InstanceShared {
187    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
188        let Self {
189            raw: _,
190            extensions,
191            flags,
192            memory_budget_thresholds,
193            debug_utils: _,
194            get_physical_device_properties: _,
195            entry: _,
196            has_nv_optimus,
197            android_sdk_version,
198            instance_api_version,
199            drop_guard: _,
200        } = self;
201        f.debug_struct("InstanceShared")
202            .field("extensions", extensions)
203            .field("flags", flags)
204            .field("memory_budget_thresholds", memory_budget_thresholds)
205            .field("has_nv_optimus", has_nv_optimus)
206            .field("android_sdk_version", android_sdk_version)
207            .field("instance_api_version", instance_api_version)
208            .finish_non_exhaustive()
209    }
210}
211
212#[derive(Debug)]
213pub struct Instance {
214    shared: Arc<InstanceShared>,
215}
216
217#[expect(missing_debug_implementations, reason = "TODO?")]
218pub struct Surface {
219    swapchain: RwLock<Option<Box<dyn swapchain::Swapchain>>>,
220    inner: Box<dyn swapchain::Surface>,
221}
222
223impl Surface {
224    /// Returns the raw Vulkan surface handle.
225    ///
226    /// Returns `None` if the surface is a DXGI surface.
227    pub unsafe fn raw_native_handle(&self) -> Option<vk::SurfaceKHR> {
228        Some(
229            self.inner
230                .as_any()
231                .downcast_ref::<swapchain::NativeSurface>()?
232                .as_raw(),
233        )
234    }
235
236    /// Get the raw Vulkan swapchain associated with this surface.
237    ///
238    /// Returns [`None`] if the surface is not configured or if the swapchain
239    /// is a DXGI swapchain.
240    pub fn raw_native_swapchain(&self) -> Option<vk::SwapchainKHR> {
241        let read = self.swapchain.read();
242        Some(
243            read.as_ref()?
244                .as_any()
245                .downcast_ref::<swapchain::NativeSwapchain>()?
246                .as_raw(),
247        )
248    }
249
250    /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
251    /// using [VK_GOOGLE_display_timing].
252    ///
253    /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
254    /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
255    ///
256    /// This can also be used to add a "not before" timestamp to the presentation.
257    ///
258    /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
259    ///
260    /// # Panics
261    ///
262    /// - If the surface hasn't been configured.
263    /// - If the surface has been configured for a DXGI swapchain.
264    /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
265    ///
266    /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
267    #[track_caller]
268    pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
269        let mut swapchain = self.swapchain.write();
270        swapchain
271            .as_mut()
272            .expect("Surface should have been configured")
273            .as_any_mut()
274            .downcast_mut::<swapchain::NativeSwapchain>()
275            .expect("Surface should have a native Vulkan swapchain")
276            .set_next_present_time(present_timing);
277    }
278}
279
280#[derive(Debug)]
281pub struct SurfaceTexture {
282    index: u32,
283    texture: Texture,
284    metadata: Box<dyn swapchain::SurfaceTextureMetadata>,
285}
286
287impl crate::DynSurfaceTexture for SurfaceTexture {}
288
289impl Borrow<Texture> for SurfaceTexture {
290    fn borrow(&self) -> &Texture {
291        &self.texture
292    }
293}
294
295impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
296    fn borrow(&self) -> &dyn crate::DynTexture {
297        &self.texture
298    }
299}
300
301#[derive(Debug)]
302pub struct Adapter {
303    raw: vk::PhysicalDevice,
304    instance: Arc<InstanceShared>,
305    //queue_families: Vec<vk::QueueFamilyProperties>,
306    known_memory_flags: vk::MemoryPropertyFlags,
307    phd_capabilities: adapter::PhysicalDeviceProperties,
308    phd_features: PhysicalDeviceFeatures,
309    downlevel_flags: wgt::DownlevelFlags,
310    private_caps: PrivateCapabilities,
311    workarounds: Workarounds,
312}
313
314// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
315enum ExtensionFn<T> {
316    /// The loaded function pointer struct for an extension.
317    Extension(T),
318    /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
319    Promoted,
320}
321
322struct DeviceExtensionFunctions {
323    debug_utils: Option<ext::debug_utils::Device>,
324    draw_indirect_count: Option<khr::draw_indirect_count::Device>,
325    timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
326    ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
327    ray_tracing_pipelines: Option<khr::ray_tracing_pipeline::Device>,
328    mesh_shading: Option<ext::mesh_shader::Device>,
329    #[cfg_attr(not(unix), allow(dead_code))]
330    external_memory_fd: Option<khr::external_memory_fd::Device>,
331}
332
333struct RayTracingDeviceExtensionFunctions {
334    acceleration_structure: khr::acceleration_structure::Device,
335    buffer_device_address: khr::buffer_device_address::Device,
336}
337
338/// Set of internal capabilities, which don't show up in the exposed
339/// device geometry, but affect the code paths taken internally.
340#[derive(Clone, Debug)]
341struct PrivateCapabilities {
342    image_view_usage: bool,
343    timeline_semaphores: bool,
344    texture_d24: bool,
345    texture_d24_s8: bool,
346    texture_s8: bool,
347    /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
348    can_present: bool,
349    non_coherent_map_mask: wgt::BufferAddress,
350    multi_draw_indirect: bool,
351    max_draw_indirect_count: u32,
352
353    /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
354    ///
355    /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
356    /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
357    /// a given bindgroup binding outside that binding's [accessible
358    /// region][ar]. Enabling `robustBufferAccess` does ensure that
359    /// out-of-bounds reads and writes are not undefined behavior (that's good),
360    /// but still permits out-of-bounds reads to return data from anywhere
361    /// within the buffer, not just the accessible region.
362    ///
363    /// [ar]: ../struct.BufferBinding.html#accessible-region
364    /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
365    robust_buffer_access: bool,
366
367    robust_image_access: bool,
368
369    /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
370    /// [`robustBufferAccess2`] feature.
371    ///
372    /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
373    /// shader accesses to buffer contents. If this feature is not available,
374    /// this backend must have Naga inject bounds checks in the generated
375    /// SPIR-V.
376    ///
377    /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
378    /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
379    /// [ar]: ../struct.BufferBinding.html#accessible-region
380    robust_buffer_access2: bool,
381
382    robust_image_access2: bool,
383    zero_initialize_workgroup_memory: bool,
384    image_format_list: bool,
385    maximum_samplers: u32,
386
387    /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
388    /// (promoted to Vulkan 1.3).
389    ///
390    /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
391    ///
392    /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
393    shader_integer_dot_product: bool,
394
395    /// True if this adapter supports 8-bit integers provided by the
396    /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
397    ///
398    /// Allows shaders to declare the "Int8" capability. Note, however, that this
399    /// feature alone allows the use of 8-bit integers "only in the `Private`,
400    /// `Workgroup` (for non-Block variables), and `Function` storage classes"
401    /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
402    /// `StorageBuffer`), you also need to enable the corresponding feature in
403    /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
404    /// capability (e.g., `StorageBuffer8BitAccess`).
405    ///
406    /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
407    /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
408    shader_int8: bool,
409
410    /// This is done to panic before undefined behavior, and is imperfect.
411    /// Basically, to allow implementations to emulate mv using instancing, if you
412    /// want to draw `n` instances to VR, you must draw `2n` instances, but you
413    /// can never draw more than `u32::MAX` instances. Therefore, when drawing
414    /// multiview on some vulkan implementations, it might restrict the instance
415    /// count, which isn't usually a thing in webgpu. We don't expose this limit
416    /// because its strange, i.e. only occurs on certain vulkan implementations
417    /// if you are drawing more than 128 million instances. We still want to avoid
418    /// undefined behavior in this situation, so we panic if the limit is violated.
419    multiview_instance_index_limit: u32,
420
421    /// BufferUsages::ACCELERATION_STRUCTURE_SCRATCH allows usage as a scratch buffer.
422    /// Vulkan has no way to specify this as a usage, and it maps to other usages, but
423    /// these usages do not have as high of an alignment requirement using the buffer as
424    ///  a scratch buffer when building acceleration structures.
425    scratch_buffer_alignment: u32,
426
427    /// `get_raytracing_pipeline_group_data` requires both a group count and a data size.
428    /// The data size parameter is just this * the group count, so we store this to not
429    /// require an unnecessary parameter.
430    ray_tracing_pipeline_group_data_size: u32,
431}
432
433bitflags::bitflags!(
434    /// Workaround flags.
435    #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
436    pub struct Workarounds: u32 {
437        /// Only generate SPIR-V for one entry point at a time.
438        const SEPARATE_ENTRY_POINTS = 0x1;
439        /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
440        /// to a subpass resolve attachment array. This nulls out that pointer in that case.
441        const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
442        /// If the following code returns false, then nvidia will end up filling the wrong range.
443        ///
444        /// ```skip
445        /// fn nvidia_succeeds() -> bool {
446        ///   # let (copy_length, start_offset) = (0, 0);
447        ///     if copy_length >= 4096 {
448        ///         if start_offset % 16 != 0 {
449        ///             if copy_length == 4096 {
450        ///                 return true;
451        ///             }
452        ///             if copy_length % 16 == 0 {
453        ///                 return false;
454        ///             }
455        ///         }
456        ///     }
457        ///     true
458        /// }
459        /// ```
460        ///
461        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
462        /// if they cover a range of 4096 bytes or more.
463        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
464    }
465);
466
467#[derive(Clone, Debug, Eq, Hash, PartialEq)]
468struct AttachmentKey {
469    format: vk::Format,
470    layout: vk::ImageLayout,
471    ops: crate::AttachmentOps,
472}
473
474impl AttachmentKey {
475    /// Returns an attachment key for a compatible attachment.
476    fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
477        Self {
478            format,
479            layout,
480            ops: crate::AttachmentOps::all(),
481        }
482    }
483}
484
485#[derive(Clone, Eq, Hash, PartialEq)]
486struct ColorAttachmentKey {
487    base: AttachmentKey,
488    resolve: Option<AttachmentKey>,
489}
490
491#[derive(Clone, Eq, Hash, PartialEq)]
492struct DepthStencilAttachmentKey {
493    base: AttachmentKey,
494    stencil_ops: crate::AttachmentOps,
495}
496
497#[derive(Clone, Eq, Default, Hash, PartialEq)]
498struct RenderPassKey {
499    colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
500    depth_stencil: Option<DepthStencilAttachmentKey>,
501    sample_count: u32,
502    multiview_mask: Option<NonZeroU32>,
503}
504
505struct DeviceShared {
506    raw: ash::Device,
507    family_index: u32,
508    queue_index: u32,
509    raw_queue: vk::Queue,
510    instance: Arc<InstanceShared>,
511    physical_device: vk::PhysicalDevice,
512    enabled_extensions: Vec<&'static CStr>,
513    extension_fns: DeviceExtensionFunctions,
514    vendor_id: u32,
515    pipeline_cache_validation_key: [u8; 16],
516    timestamp_period: f32,
517    private_caps: PrivateCapabilities,
518    workarounds: Workarounds,
519    features: wgt::Features,
520    render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
521    sampler_cache: Mutex<sampler::SamplerCache>,
522    memory_allocations_counter: InternalCounter,
523
524    /// Because we have cached framebuffers which are not deleted from until
525    /// the device is destroyed, if the implementation of vulkan re-uses handles
526    /// we need some way to differentiate between the old handle and the new handle.
527    /// This factory allows us to have a dedicated identity value for each texture.
528    texture_identity_factory: ResourceIdentityFactory<vk::Image>,
529    /// As above, for texture views.
530    texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
531
532    empty_descriptor_set_layout: vk::DescriptorSetLayout,
533
534    // The `drop_guard` field must be the last field of this struct so it is dropped last.
535    // Do not add new fields after it.
536    drop_guard: Option<crate::DropGuard>,
537}
538
539impl Drop for DeviceShared {
540    fn drop(&mut self) {
541        for &raw in self.render_passes.lock().values() {
542            unsafe { self.raw.destroy_render_pass(raw, None) };
543        }
544        unsafe {
545            self.raw
546                .destroy_descriptor_set_layout(self.empty_descriptor_set_layout, None)
547        };
548        if self.drop_guard.is_none() {
549            unsafe { self.raw.destroy_device(None) };
550        }
551    }
552}
553
554#[expect(
555    missing_debug_implementations,
556    reason = "needs work to not be disastrously verbose"
557)]
558pub struct Device {
559    mem_allocator: Mutex<gpu_allocator::vulkan::Allocator>,
560    desc_allocator: Mutex<descriptor::DescriptorAllocator>,
561    valid_ash_memory_types: u32,
562    naga_options: naga::back::spv::Options<'static>,
563    #[cfg(feature = "renderdoc")]
564    render_doc: crate::auxil::renderdoc::RenderDoc,
565    counters: Arc<wgt::HalCounters>,
566    // Struct members are dropped from first to last, put the Device last to ensure that
567    // all resources that depends on it are destroyed before it like the mem_allocator
568    shared: Arc<DeviceShared>,
569}
570
571impl Drop for Device {
572    fn drop(&mut self) {}
573}
574
575/// Semaphores for forcing queue submissions to run in order.
576///
577/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
578/// ordered, then the first submission will finish on the GPU before the second
579/// submission begins. To get this behavior on Vulkan we need to pass semaphores
580/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
581/// and to signal when their execution is done.
582///
583/// Normally this can be done with a single semaphore, waited on and then
584/// signalled for each submission. At any given time there's exactly one
585/// submission that would signal the semaphore, and exactly one waiting on it,
586/// as Vulkan requires.
587///
588/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
589/// hang if we use a single semaphore. The workaround is to alternate between
590/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
591/// the workaround until, say, Oct 2026.
592///
593/// [`wgpu_hal::Queue`]: crate::Queue
594/// [`submit`]: crate::Queue::submit
595/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
596/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
597#[derive(Clone)]
598struct RelaySemaphores {
599    /// The semaphore the next submission should wait on before beginning
600    /// execution on the GPU. This is `None` for the first submission, which
601    /// should not wait on anything at all.
602    wait: Option<vk::Semaphore>,
603
604    /// The semaphore the next submission should signal when it has finished
605    /// execution on the GPU.
606    signal: vk::Semaphore,
607}
608
609impl RelaySemaphores {
610    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
611        Ok(Self {
612            wait: None,
613            signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
614        })
615    }
616
617    /// Advances the semaphores, returning the semaphores that should be used for a submission.
618    fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
619        let old = self.clone();
620
621        // Build the state for the next submission.
622        match self.wait {
623            None => {
624                // The `old` values describe the first submission to this queue.
625                // The second submission should wait on `old.signal`, and then
626                // signal a new semaphore which we'll create now.
627                self.wait = Some(old.signal);
628                self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
629            }
630            Some(ref mut wait) => {
631                // What this submission signals, the next should wait.
632                mem::swap(wait, &mut self.signal);
633            }
634        };
635
636        Ok(old)
637    }
638
639    /// Destroys the semaphores.
640    unsafe fn destroy(&self, device: &ash::Device) {
641        unsafe {
642            if let Some(wait) = self.wait {
643                device.destroy_semaphore(wait, None);
644            }
645            device.destroy_semaphore(self.signal, None);
646        }
647    }
648}
649
650pub struct Queue {
651    raw: vk::Queue,
652    device: Arc<DeviceShared>,
653    family_index: u32,
654    relay_semaphores: Mutex<RelaySemaphores>,
655    signal_semaphores: Mutex<SemaphoreList>,
656    wait_semaphores: Mutex<SemaphoreList>,
657}
658
659impl fmt::Debug for Queue {
660    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
661        let Self {
662            raw: _,
663            device: _,
664            family_index,
665            relay_semaphores: _,
666            signal_semaphores: _,
667            wait_semaphores: _,
668        } = self;
669        f.debug_struct("Queue")
670            .field("family_index", family_index)
671            .finish_non_exhaustive()
672    }
673}
674
675impl Queue {
676    pub fn as_raw(&self) -> vk::Queue {
677        self.raw
678    }
679}
680
681impl Drop for Queue {
682    fn drop(&mut self) {
683        unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
684    }
685}
686#[derive(Debug)]
687enum BufferMemoryBacking {
688    Managed(gpu_allocator::vulkan::Allocation),
689    VulkanMemory {
690        memory: vk::DeviceMemory,
691        offset: u64,
692        size: u64,
693    },
694}
695impl BufferMemoryBacking {
696    fn memory(&self) -> vk::DeviceMemory {
697        match self {
698            Self::Managed(m) => unsafe { m.memory() },
699            Self::VulkanMemory { memory, .. } => *memory,
700        }
701    }
702    fn offset(&self) -> u64 {
703        match self {
704            Self::Managed(m) => m.offset(),
705            Self::VulkanMemory { offset, .. } => *offset,
706        }
707    }
708    fn size(&self) -> u64 {
709        match self {
710            Self::Managed(m) => m.size(),
711            Self::VulkanMemory { size, .. } => *size,
712        }
713    }
714}
715/// Describes who owns a [`Buffer`]'s `vk::Buffer` handle and its backing memory,
716/// and therefore what cleanup is required when the buffer is destroyed.
717#[derive(Debug)]
718enum BufferOwnership {
719    /// wgpu-hal owns the `vk::Buffer` and its backing memory. On cleanup the buffer
720    /// handle is destroyed and the memory is released.
721    Managed(Mutex<BufferMemoryBacking>),
722    /// wgpu-hal owns the `vk::Buffer` handle but the backing memory is kept alive
723    /// by the caller. On cleanup only the buffer handle is destroyed.
724    RawHandle,
725    /// Caller owns the `vk::Buffer` and its backing memory. On cleanup the
726    /// [`crate::DropGuard`] runs the caller's cleanup callback and wgpu-hal touches
727    /// neither the handle nor the memory.
728    External(crate::DropGuard),
729}
730
731#[derive(Debug)]
732pub struct Buffer {
733    raw: vk::Buffer,
734
735    // This field must be last, because it may contain a `DropGuard` which needs to be dropped after all other fields.
736    ownership: BufferOwnership,
737}
738impl Buffer {
739    /// # Safety
740    ///
741    /// - `vk_buffer`'s memory must be managed by the caller
742    /// - Externally imported buffers can't be mapped by `wgpu`
743    pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
744        Self {
745            raw: vk_buffer,
746            ownership: BufferOwnership::RawHandle,
747        }
748    }
749
750    /// # Safety
751    /// - `vk_buffer` must outlive the returned `Buffer`.
752    /// - wgpu-hal will NOT call `vkDestroyBuffer`; the caller remains responsible for the buffer handle's destruction.
753    ///   The `drop_callback` runs when the `Buffer` drops and may be used to release caller-side bookkeeping.
754    /// - Externally imported buffers can't be mapped by `wgpu`.
755    pub unsafe fn from_raw_externally_owned(
756        vk_buffer: vk::Buffer,
757        drop_callback: crate::DropCallback,
758    ) -> Self {
759        Self {
760            raw: vk_buffer,
761            ownership: BufferOwnership::External(crate::DropGuard::new(drop_callback)),
762        }
763    }
764
765    /// # Safety
766    /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
767    /// - Externally imported buffers can't be mapped by `wgpu`
768    /// - `offset` and `size` must be valid with the allocation of `memory`
769    pub unsafe fn from_raw_managed(
770        vk_buffer: vk::Buffer,
771        memory: vk::DeviceMemory,
772        offset: u64,
773        size: u64,
774    ) -> Self {
775        Self {
776            raw: vk_buffer,
777            ownership: BufferOwnership::Managed(Mutex::new(BufferMemoryBacking::VulkanMemory {
778                memory,
779                offset,
780                size,
781            })),
782        }
783    }
784
785    /// # Safety
786    /// - The buffer handle must not be manually destroyed
787    pub unsafe fn raw_handle(&self) -> vk::Buffer {
788        self.raw
789    }
790}
791
792impl crate::DynBuffer for Buffer {}
793
794#[derive(Debug)]
795pub struct AccelerationStructure {
796    raw: vk::AccelerationStructureKHR,
797    buffer: vk::Buffer,
798    allocation: gpu_allocator::vulkan::Allocation,
799    compacted_size_query: Option<vk::QueryPool>,
800}
801
802impl crate::DynAccelerationStructure for AccelerationStructure {}
803
804#[derive(Debug)]
805pub enum TextureMemory {
806    // shared memory in GPU allocator (owned by wgpu-hal)
807    Allocation(gpu_allocator::vulkan::Allocation),
808
809    // dedicated memory (owned by wgpu-hal)
810    Dedicated(vk::DeviceMemory),
811
812    // memory not owned by wgpu
813    External,
814}
815
816#[derive(Debug)]
817pub struct Texture {
818    raw: vk::Image,
819    memory: TextureMemory,
820    format: wgt::TextureFormat,
821    copy_size: crate::CopyExtent,
822    identity: ResourceIdentity<vk::Image>,
823
824    // The `drop_guard` field must be the last field of this struct so it is dropped last.
825    // Do not add new fields after it.
826    drop_guard: Option<crate::DropGuard>,
827}
828
829impl crate::DynTexture for Texture {}
830
831impl Texture {
832    /// # Safety
833    ///
834    /// - The image handle must not be manually destroyed
835    pub unsafe fn raw_handle(&self) -> vk::Image {
836        self.raw
837    }
838
839    /// # Safety
840    ///
841    /// - The caller must not free the `vk::DeviceMemory` or
842    ///   `gpu_alloc::MemoryBlock` in the returned `TextureMemory`.
843    pub unsafe fn memory(&self) -> &TextureMemory {
844        &self.memory
845    }
846}
847
848#[derive(Debug)]
849pub struct TextureView {
850    raw_texture: vk::Image,
851    raw: vk::ImageView,
852    _layers: NonZeroU32,
853    format: wgt::TextureFormat,
854    raw_format: vk::Format,
855    base_mip_level: u32,
856    dimension: wgt::TextureViewDimension,
857    texture_identity: ResourceIdentity<vk::Image>,
858    view_identity: ResourceIdentity<vk::ImageView>,
859}
860
861impl crate::DynTextureView for TextureView {}
862
863impl TextureView {
864    /// # Safety
865    ///
866    /// - The image view handle must not be manually destroyed
867    pub unsafe fn raw_handle(&self) -> vk::ImageView {
868        self.raw
869    }
870
871    /// Returns the raw texture view, along with its identity.
872    fn identified_raw_view(&self) -> IdentifiedTextureView {
873        IdentifiedTextureView {
874            raw: self.raw,
875            identity: self.view_identity,
876        }
877    }
878}
879
880#[derive(Debug)]
881pub struct Sampler {
882    raw: vk::Sampler,
883    create_info: vk::SamplerCreateInfo<'static>,
884}
885
886impl crate::DynSampler for Sampler {}
887
888/// Information about a binding within a specific BindGroupLayout / BindGroup.
889/// This will be used to construct a [`naga::back::spv::BindingInfo`], where
890/// the descriptor set value will be taken from the index of the group.
891#[derive(Copy, Clone, Debug)]
892struct BindingInfo {
893    binding: u32,
894    binding_array_size: Option<NonZeroU32>,
895}
896
897#[derive(Debug)]
898pub struct BindGroupLayout {
899    raw: vk::DescriptorSetLayout,
900    desc_count: descriptor::DescriptorCounts,
901    /// Sorted list of entries.
902    entries: Box<[wgt::BindGroupLayoutEntry]>,
903    /// Map of original binding index to remapped binding index and optional
904    /// array size.
905    binding_map: Vec<(u32, BindingInfo)>,
906    contains_binding_arrays: bool,
907}
908
909impl crate::DynBindGroupLayout for BindGroupLayout {}
910
911#[derive(Debug)]
912pub struct PipelineLayout {
913    raw: vk::PipelineLayout,
914    binding_map: naga::back::spv::BindingMap,
915}
916
917impl crate::DynPipelineLayout for PipelineLayout {}
918
919#[derive(Debug)]
920pub struct BindGroup {
921    set: descriptor::DescriptorSet,
922}
923
924impl crate::DynBindGroup for BindGroup {}
925
926/// Miscellaneous allocation recycling pool for `CommandAllocator`.
927#[derive(Default)]
928struct Temp {
929    marker: Vec<u8>,
930    buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
931    image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
932}
933
934impl Temp {
935    fn clear(&mut self) {
936        self.marker.clear();
937        self.buffer_barriers.clear();
938        self.image_barriers.clear();
939    }
940
941    fn make_c_str(&mut self, name: &str) -> &CStr {
942        self.marker.clear();
943        self.marker.extend_from_slice(name.as_bytes());
944        self.marker.push(0);
945        unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
946    }
947}
948
949/// Generates unique IDs for each resource of type `T`.
950///
951/// Because vk handles are not permanently unique, this
952/// provides a way to generate unique IDs for each resource.
953struct ResourceIdentityFactory<T> {
954    #[cfg(not(target_has_atomic = "64"))]
955    next_id: Mutex<u64>,
956    #[cfg(target_has_atomic = "64")]
957    next_id: core::sync::atomic::AtomicU64,
958    _phantom: PhantomData<T>,
959}
960
961impl<T> ResourceIdentityFactory<T> {
962    fn new() -> Self {
963        Self {
964            #[cfg(not(target_has_atomic = "64"))]
965            next_id: Mutex::new(0),
966            #[cfg(target_has_atomic = "64")]
967            next_id: core::sync::atomic::AtomicU64::new(0),
968            _phantom: PhantomData,
969        }
970    }
971
972    /// Returns a new unique ID for a resource of type `T`.
973    fn next(&self) -> ResourceIdentity<T> {
974        #[cfg(not(target_has_atomic = "64"))]
975        {
976            let mut next_id = self.next_id.lock();
977            let id = *next_id;
978            *next_id += 1;
979            ResourceIdentity {
980                id,
981                _phantom: PhantomData,
982            }
983        }
984
985        #[cfg(target_has_atomic = "64")]
986        ResourceIdentity {
987            id: self
988                .next_id
989                .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
990            _phantom: PhantomData,
991        }
992    }
993}
994
995/// A unique identifier for a resource of type `T`.
996///
997/// This is used as a hashable key for resources, which
998/// is permanently unique through the lifetime of the program.
999#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
1000struct ResourceIdentity<T> {
1001    id: u64,
1002    _phantom: PhantomData<T>,
1003}
1004
1005#[derive(Clone, Eq, Hash, PartialEq)]
1006struct FramebufferKey {
1007    raw_pass: vk::RenderPass,
1008    /// Because this is used as a key in a hash map, we need to include the identity
1009    /// so that this hashes differently, even if the ImageView handles are the same
1010    /// between different views.
1011    attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
1012    /// While this is redundant for calculating the hash, we need access to an array
1013    /// of all the raw ImageViews when we are creating the actual framebuffer,
1014    /// so we store this here.
1015    attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
1016    extent: wgt::Extent3d,
1017}
1018
1019impl FramebufferKey {
1020    fn push_view(&mut self, view: IdentifiedTextureView) {
1021        self.attachment_identities.push(view.identity);
1022        self.attachment_views.push(view.raw);
1023    }
1024}
1025
1026/// A texture view paired with its identity.
1027#[derive(Copy, Clone)]
1028struct IdentifiedTextureView {
1029    raw: vk::ImageView,
1030    identity: ResourceIdentity<vk::ImageView>,
1031}
1032
1033#[derive(Clone, Eq, Hash, PartialEq)]
1034struct TempTextureViewKey {
1035    texture: vk::Image,
1036    /// As this is used in a hashmap, we need to
1037    /// include the identity so that this hashes differently,
1038    /// even if the Image handles are the same between different images.
1039    texture_identity: ResourceIdentity<vk::Image>,
1040    format: vk::Format,
1041    mip_level: u32,
1042    depth_slice: u32,
1043}
1044
1045// Any state in this struct that may be dirty after an abandoned encoding must
1046// be reset for reused encoders in `begin_encoding`.
1047pub struct CommandEncoder {
1048    raw: vk::CommandPool,
1049    device: Arc<DeviceShared>,
1050
1051    /// The current command buffer, if `self` is in the ["recording"]
1052    /// state.
1053    ///
1054    /// ["recording"]: crate::CommandEncoder
1055    ///
1056    /// If non-`null`, the buffer is in the Vulkan "recording" state.
1057    active: vk::CommandBuffer,
1058
1059    /// What kind of pass we are currently within: compute or render.
1060    bind_point: vk::PipelineBindPoint,
1061
1062    /// Allocation recycling pool for this encoder.
1063    temp: Temp,
1064
1065    /// A pool of available command buffers.
1066    ///
1067    /// These are all in the Vulkan "initial" state.
1068    free: Vec<vk::CommandBuffer>,
1069
1070    /// A pool of discarded command buffers.
1071    ///
1072    /// These could be in any Vulkan state except "pending".
1073    discarded: Vec<vk::CommandBuffer>,
1074
1075    /// If this is true, the active renderpass enabled a debug span,
1076    /// and needs to be disabled on renderpass close.
1077    rpass_debug_marker_active: bool,
1078
1079    /// If set, the end of the next render/compute pass will write a timestamp at
1080    /// the given pool & location.
1081    end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1082
1083    framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1084    temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
1085
1086    counters: Arc<wgt::HalCounters>,
1087
1088    current_pipeline_is_multiview: bool,
1089}
1090
1091impl Drop for CommandEncoder {
1092    fn drop(&mut self) {
1093        // SAFETY:
1094        //
1095        // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1096        // `CommandBuffer` must live until its execution is complete, and that a
1097        // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1098        // Thus, we know that none of our `CommandBuffers` are in the "pending"
1099        // state.
1100        //
1101        // The other VUIDs are pretty obvious.
1102        unsafe {
1103            // `vkDestroyCommandPool` also frees any command buffers allocated
1104            // from that pool, so there's no need to explicitly call
1105            // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1106            // fields.
1107            self.device.raw.destroy_command_pool(self.raw, None);
1108        }
1109
1110        for (_, fb) in self.framebuffers.drain() {
1111            unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1112        }
1113
1114        for (_, view) in self.temp_texture_views.drain() {
1115            unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1116        }
1117
1118        self.counters.command_encoders.sub(1);
1119    }
1120}
1121
1122impl CommandEncoder {
1123    /// # Safety
1124    ///
1125    /// - The command buffer handle must not be manually destroyed
1126    pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1127        self.active
1128    }
1129}
1130
1131impl fmt::Debug for CommandEncoder {
1132    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1133        f.debug_struct("CommandEncoder")
1134            .field("raw", &self.raw)
1135            .finish()
1136    }
1137}
1138
1139#[derive(Debug)]
1140pub struct CommandBuffer {
1141    raw: vk::CommandBuffer,
1142}
1143
1144impl crate::DynCommandBuffer for CommandBuffer {}
1145
1146#[derive(Debug)]
1147pub enum ShaderModule {
1148    Raw(vk::ShaderModule),
1149    Intermediate {
1150        naga_shader: crate::NagaShader,
1151        runtime_checks: wgt::ShaderRuntimeChecks,
1152    },
1153}
1154
1155impl crate::DynShaderModule for ShaderModule {}
1156
1157#[derive(Debug)]
1158pub struct RenderPipeline {
1159    raw: vk::Pipeline,
1160    is_multiview: bool,
1161}
1162
1163impl crate::DynRenderPipeline for RenderPipeline {}
1164
1165#[derive(Debug)]
1166pub struct ComputePipeline {
1167    raw: vk::Pipeline,
1168}
1169
1170impl crate::DynComputePipeline for ComputePipeline {}
1171
1172#[derive(Debug)]
1173pub struct RayTracingPipeline {
1174    raw: vk::Pipeline,
1175}
1176
1177impl crate::DynRayTracingPipeline for RayTracingPipeline {}
1178
1179#[derive(Debug)]
1180pub struct PipelineCache {
1181    raw: vk::PipelineCache,
1182}
1183
1184impl crate::DynPipelineCache for PipelineCache {}
1185
1186#[derive(Debug)]
1187pub struct QuerySet {
1188    raw: vk::QueryPool,
1189}
1190
1191impl crate::DynQuerySet for QuerySet {}
1192
1193/// The [`Api::Fence`] type for [`vulkan::Api`].
1194///
1195/// This is an `enum` because there are two possible implementations of
1196/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1197/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1198/// require non-1.0 features.
1199///
1200/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1201/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1202/// otherwise.
1203///
1204/// [`Api::Fence`]: crate::Api::Fence
1205/// [`vulkan::Api`]: Api
1206/// [`Device::create_fence`]: crate::Device::create_fence
1207/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1208/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1209/// [`FencePool`]: Fence::FencePool
1210#[derive(Debug)]
1211pub enum Fence {
1212    /// A Vulkan [timeline semaphore].
1213    ///
1214    /// These are simpler to use than Vulkan fences, since timeline semaphores
1215    /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1216    ///
1217    /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1218    /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1219    TimelineSemaphore(vk::Semaphore),
1220
1221    /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1222    ///
1223    /// The effective [`FenceValue`] of this variant is the greater of
1224    /// `last_completed` and the maximum value associated with a signalled fence
1225    /// in `active`.
1226    ///
1227    /// Fences are available in all versions of Vulkan, but since they only have
1228    /// two states, "signaled" and "unsignaled", we need to use a separate fence
1229    /// for each queue submission we might want to wait for, and remember which
1230    /// [`FenceValue`] each one represents.
1231    ///
1232    /// One should keep the fence pool read while there are any references to the
1233    /// fences inside of them. This ensures there are no race conditions when
1234    /// resetting the fences
1235    ///
1236    /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1237    /// [`FenceValue`]: crate::FenceValue
1238    FencePool(RwLock<FencePool>),
1239}
1240
1241/// A shared fence type. The arc is expect to have a ref-count of one once a function has finished being called
1242///
1243/// A fence should have access synchronised as fence resetting might happen at any point. Resetting checks the ref-count
1244/// of the fence, so instead of copying the fence, it should have its `Arc` container cloned which shows not to reset
1245/// this fence as it is being used.
1246pub(super) type SynchronizedFence = Arc<vk::Fence>;
1247
1248#[derive(Debug)]
1249pub struct FencePool {
1250    last_completed: crate::FenceValue,
1251    /// The pending fence values have to be ascending.
1252    active: Vec<(crate::FenceValue, SynchronizedFence)>,
1253    // Don't need extra synchronisation around the fences here, if they are used they should be put into active.
1254    free: Vec<vk::Fence>,
1255}
1256
1257impl crate::DynFence for Fence {}
1258
1259impl Fence {
1260    /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1261    ///
1262    /// As an optimization, assume that we already know that the fence has
1263    /// reached `last_completed`, and don't bother checking fences whose values
1264    /// are less than that: those fences remain in the `active` array only
1265    /// because we haven't called `maintain` yet to clean them up.
1266    ///
1267    /// [`FenceValue`]: crate::FenceValue
1268    fn check_active(
1269        device: &ash::Device,
1270        mut last_completed: crate::FenceValue,
1271        active: &[(crate::FenceValue, SynchronizedFence)],
1272    ) -> Result<crate::FenceValue, crate::DeviceError> {
1273        for &(value, ref raw) in active.iter() {
1274            unsafe {
1275                if value > last_completed
1276                    && device
1277                        // Don't need to clone as active should be from a read or
1278                        // write lock which means this is already synchronised.
1279                        .get_fence_status(**raw)
1280                        .map_err(map_host_device_oom_and_lost_err)?
1281                {
1282                    last_completed = value;
1283                }
1284            }
1285        }
1286        Ok(last_completed)
1287    }
1288
1289    /// Return the highest signalled [`FenceValue`] for `self`.
1290    ///
1291    /// [`FenceValue`]: crate::FenceValue
1292    fn get_latest(
1293        &self,
1294        device: &ash::Device,
1295        extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1296    ) -> Result<crate::FenceValue, crate::DeviceError> {
1297        match *self {
1298            Self::TimelineSemaphore(raw) => unsafe {
1299                Ok(match *extension.unwrap() {
1300                    ExtensionFn::Extension(ref ext) => ext
1301                        .get_semaphore_counter_value(raw)
1302                        .map_err(map_host_device_oom_and_lost_err)?,
1303                    ExtensionFn::Promoted => device
1304                        .get_semaphore_counter_value(raw)
1305                        .map_err(map_host_device_oom_and_lost_err)?,
1306                })
1307            },
1308            Self::FencePool(ref pool) => {
1309                let FencePool {
1310                    last_completed,
1311                    ref active,
1312                    free: _,
1313                } = *pool.read();
1314                Self::check_active(device, last_completed, active)
1315            }
1316        }
1317    }
1318
1319    /// Trim the internal state of this [`Fence`].
1320    ///
1321    /// This function has no externally visible effect, but you should call it
1322    /// periodically to keep this fence's resource consumption under control.
1323    ///
1324    /// For fences using the [`FencePool`] implementation, this function
1325    /// recycles fences that have been signaled. If you don't call this,
1326    /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1327    /// time it's called.
1328    ///
1329    /// [`FencePool`]: Fence::FencePool
1330    /// [`Queue::submit`]: crate::Queue::submit
1331    fn maintain(&self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1332        match *self {
1333            Self::TimelineSemaphore(_) => {}
1334            Self::FencePool(ref pool) => {
1335                let FencePool {
1336                    ref mut last_completed,
1337                    ref mut active,
1338                    ref mut free,
1339                } = *pool.write();
1340
1341                let base_free = free.len();
1342                let latest = Self::check_active(device, *last_completed, active)?;
1343
1344                active.retain_mut(|&mut (value, ref mut fence)| {
1345                    if value > latest {
1346                        true
1347                    } else if let Some(fence) = Arc::get_mut(fence) {
1348                        // No other references to these, so we have exclusive access. Add them to free and reset them later,
1349                        // but drop them from active immediately
1350                        free.push(*fence);
1351                        false
1352                    } else {
1353                        // some other function is using it. Although this shouldn't be to long,
1354                        // maintain shouldn't block, and it should be cleared up by the next time it happens
1355                        true
1356                    }
1357                });
1358
1359                if free.len() != base_free {
1360                    unsafe { device.reset_fences(&free[base_free..]) }
1361                        .map_err(map_device_oom_err)?
1362                }
1363                *last_completed = latest;
1364            }
1365        }
1366        Ok(())
1367    }
1368}
1369
1370impl crate::Queue for Queue {
1371    type A = Api;
1372
1373    unsafe fn submit(
1374        &self,
1375        command_buffers: &[&CommandBuffer],
1376        surface_textures: &[&SurfaceTexture],
1377        (signal_fence, signal_value): (&Fence, crate::FenceValue),
1378    ) -> Result<(), crate::DeviceError> {
1379        let mut fence_raw = vk::Fence::null();
1380
1381        let mut wait_semaphores = SemaphoreList::new(SemaphoreListMode::Wait);
1382        let mut signal_semaphores = SemaphoreList::new(SemaphoreListMode::Signal);
1383
1384        // Double check that the same swapchain image isn't being given to us multiple times,
1385        // as that will deadlock when we try to lock them all.
1386        debug_assert!(
1387            {
1388                let mut check = HashSet::with_capacity(surface_textures.len());
1389                // We compare the Box by pointer, as Eq isn't well defined for SurfaceSemaphores.
1390                for st in surface_textures {
1391                    let ptr: *const () = <*const _>::cast(&*st.metadata);
1392                    check.insert(ptr as usize);
1393                }
1394                check.len() == surface_textures.len()
1395            },
1396            "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1397        );
1398
1399        let locked_swapchain_semaphores = surface_textures
1400            .iter()
1401            .map(|st| st.metadata.get_semaphore_guard())
1402            .collect::<Vec<_>>();
1403
1404        for mut semaphores in locked_swapchain_semaphores {
1405            semaphores.set_used_fence_value(signal_value);
1406
1407            // If we're the first submission to operate on this image, wait on
1408            // its acquire semaphore, to make sure the presentation engine is
1409            // done with it.
1410            if let Some(sem) = semaphores.get_acquire_wait_semaphore() {
1411                wait_semaphores.push_wait(sem, vk::PipelineStageFlags::TOP_OF_PIPE);
1412            }
1413
1414            // Get a semaphore to signal when we're done writing to this surface
1415            // image. Presentation of this image will wait for this.
1416            let signal_semaphore = semaphores.get_submit_signal_semaphore(&self.device)?;
1417            signal_semaphores.push_signal(signal_semaphore);
1418        }
1419
1420        let mut guard = self.signal_semaphores.lock();
1421        if !guard.is_empty() {
1422            signal_semaphores.append(&mut guard);
1423        }
1424
1425        let mut wait_guard = self.wait_semaphores.lock();
1426        if !wait_guard.is_empty() {
1427            wait_semaphores.append(&mut wait_guard);
1428        }
1429
1430        // In order for submissions to be strictly ordered, we encode a dependency between each submission
1431        // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1432        let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1433
1434        if let Some(sem) = semaphore_state.wait {
1435            wait_semaphores.push_wait(
1436                SemaphoreType::Binary(sem),
1437                vk::PipelineStageFlags::TOP_OF_PIPE,
1438            );
1439        }
1440
1441        signal_semaphores.push_signal(SemaphoreType::Binary(semaphore_state.signal));
1442
1443        // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1444        signal_fence.maintain(&self.device.raw)?;
1445        // Keeping the Arc around is probably unneeded - the fence should never be signaled as it was reset,
1446        // and newer submits should not happen until this submit is done. Therefore, it should be too high
1447        // to be reset.
1448        let shared_fence;
1449        match *signal_fence {
1450            Fence::TimelineSemaphore(raw) => {
1451                signal_semaphores.push_signal(SemaphoreType::Timeline(raw, signal_value));
1452            }
1453            Fence::FencePool(ref pool) => {
1454                let FencePool {
1455                    ref mut active,
1456                    ref mut free,
1457                    ..
1458                } = *pool.write();
1459                shared_fence = match free.pop() {
1460                    Some(raw) => Arc::new(raw),
1461                    None => unsafe {
1462                        let fence = self
1463                            .device
1464                            .raw
1465                            .create_fence(&vk::FenceCreateInfo::default(), None)
1466                            .map_err(map_host_device_oom_err)?;
1467                        Arc::new(fence)
1468                    },
1469                };
1470                fence_raw = *shared_fence;
1471                active.push((signal_value, shared_fence.clone()));
1472            }
1473        }
1474
1475        let vk_cmd_buffers = command_buffers
1476            .iter()
1477            .map(|cmd| cmd.raw)
1478            .collect::<Vec<_>>();
1479
1480        let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1481        let mut vk_timeline_info = mem::MaybeUninit::uninit();
1482        vk_info = SemaphoreList::add_to_submit(
1483            &mut wait_semaphores,
1484            &mut signal_semaphores,
1485            vk_info,
1486            &mut vk_timeline_info,
1487        );
1488
1489        profiling::scope!("vkQueueSubmit");
1490        unsafe {
1491            self.device
1492                .raw
1493                .queue_submit(self.raw, &[vk_info], fence_raw)
1494                .map_err(map_host_device_oom_and_lost_err)?
1495        };
1496        Ok(())
1497    }
1498
1499    unsafe fn present(
1500        &self,
1501        surface: &Surface,
1502        texture: SurfaceTexture,
1503    ) -> Result<(), crate::SurfaceError> {
1504        let mut swapchain = surface.swapchain.write();
1505
1506        unsafe { swapchain.as_mut().unwrap().present(self, texture) }
1507    }
1508
1509    unsafe fn get_timestamp_period(&self) -> f32 {
1510        self.device.timestamp_period
1511    }
1512
1513    unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> {
1514        unsafe { self.device.raw.queue_wait_idle(self.raw) }
1515            .map_err(map_host_device_oom_and_lost_err)
1516    }
1517}
1518
1519impl Queue {
1520    pub fn raw_device(&self) -> &ash::Device {
1521        &self.device.raw
1522    }
1523
1524    pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1525        let mut guard = self.signal_semaphores.lock();
1526        if let Some(value) = semaphore_value {
1527            guard.push_signal(SemaphoreType::Timeline(semaphore, value));
1528        } else {
1529            guard.push_signal(SemaphoreType::Binary(semaphore));
1530        }
1531    }
1532
1533    /// Remove `semaphore` from the pending signal list if it is still present.
1534    ///
1535    /// Returns `true` if the semaphore was found and removed. If the submit
1536    /// already consumed it, this is a harmless no-op that returns `false`.
1537    pub fn remove_signal_semaphore(&self, semaphore: vk::Semaphore) -> bool {
1538        self.signal_semaphores.lock().remove(semaphore)
1539    }
1540
1541    /// Stage a semaphore wait on the next [`crate::Queue::submit`] call.
1542    ///
1543    /// `semaphore_value` selects the kind of payload the wait targets:
1544    ///
1545    /// - `Some(value)` - wait until `semaphore` (a timeline semaphore) has been signalled to at least `value`.
1546    /// - `None` - wait on a binary semaphore signal.
1547    ///
1548    /// `stage` is the pipeline stage at which the wait blocks downstream
1549    /// work (e.g. `vk::PipelineStageFlags::TOP_OF_PIPE` to gate the
1550    /// entire submission, or a more specific stage when only that stage
1551    /// reads the synchronised resource).
1552    pub fn add_wait_semaphore(
1553        &self,
1554        semaphore: vk::Semaphore,
1555        semaphore_value: Option<u64>,
1556        stage: vk::PipelineStageFlags,
1557    ) {
1558        let mut guard = self.wait_semaphores.lock();
1559        if let Some(value) = semaphore_value {
1560            guard.push_wait(SemaphoreType::Timeline(semaphore, value), stage);
1561        } else {
1562            guard.push_wait(SemaphoreType::Binary(semaphore), stage);
1563        }
1564    }
1565
1566    /// Remove `semaphore` from the pending wait list if it is still present.
1567    ///
1568    /// Returns `true` if the semaphore was found and removed. If the submit
1569    /// already consumed it, this is a no-op that returns `false`.
1570    pub fn remove_wait_semaphore(&self, semaphore: vk::Semaphore) -> bool {
1571        self.wait_semaphores.lock().remove(semaphore)
1572    }
1573}
1574
1575/// Maps
1576///
1577/// - VK_ERROR_OUT_OF_HOST_MEMORY
1578/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1579fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1580    match err {
1581        vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1582            get_oom_err(err)
1583        }
1584        e => get_unexpected_err(e),
1585    }
1586}
1587
1588/// Maps
1589///
1590/// - VK_ERROR_OUT_OF_HOST_MEMORY
1591/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1592/// - VK_ERROR_DEVICE_LOST
1593fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1594    match err {
1595        vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1596        other => map_host_device_oom_err(other),
1597    }
1598}
1599
1600/// Maps
1601///
1602/// - VK_ERROR_OUT_OF_HOST_MEMORY
1603/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1604/// - VK_ERROR_FRAGMENTATION
1605fn map_host_device_oom_and_fragmentation_err(err: vk::Result) -> crate::DeviceError {
1606    match err {
1607        vk::Result::ERROR_FRAGMENTATION => get_oom_err(err),
1608        other => map_host_device_oom_err(other),
1609    }
1610}
1611
1612/// Maps
1613///
1614/// - VK_ERROR_OUT_OF_HOST_MEMORY
1615/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1616/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1617fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1618    // We don't use VK_KHR_buffer_device_address
1619    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1620    map_host_device_oom_err(err)
1621}
1622
1623/// Maps
1624///
1625/// - VK_ERROR_OUT_OF_HOST_MEMORY
1626fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1627    match err {
1628        vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1629        e => get_unexpected_err(e),
1630    }
1631}
1632
1633/// Maps
1634///
1635/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1636fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1637    match err {
1638        vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1639        e => get_unexpected_err(e),
1640    }
1641}
1642
1643/// Maps
1644///
1645/// - VK_ERROR_OUT_OF_HOST_MEMORY
1646/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1647fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1648    // We don't use VK_KHR_buffer_device_address
1649    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1650    map_host_oom_err(err)
1651}
1652
1653/// Maps
1654///
1655/// - VK_ERROR_OUT_OF_HOST_MEMORY
1656/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1657/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1658/// - VK_ERROR_INVALID_SHADER_NV
1659fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1660    // We don't use VK_EXT_pipeline_creation_cache_control
1661    // VK_PIPELINE_COMPILE_REQUIRED_EXT
1662    // We don't use VK_NV_glsl_shader
1663    // VK_ERROR_INVALID_SHADER_NV
1664    map_host_device_oom_err(err)
1665}
1666
1667/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1668/// feature flag is enabled.
1669fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1670    #[cfg(feature = "internal_error_panic")]
1671    panic!("Unexpected Vulkan error: {_err:?}");
1672
1673    #[allow(unreachable_code)]
1674    crate::DeviceError::Unexpected
1675}
1676
1677/// Returns [`crate::DeviceError::OutOfMemory`].
1678fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1679    crate::DeviceError::OutOfMemory
1680}
1681
1682/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1683/// feature flag is enabled.
1684fn get_lost_err() -> crate::DeviceError {
1685    #[cfg(feature = "device_lost_panic")]
1686    panic!("Device lost");
1687
1688    #[allow(unreachable_code)]
1689    crate::DeviceError::Lost
1690}
1691
1692#[derive(Clone, Copy, Pod, Zeroable)]
1693#[repr(C)]
1694struct RawTlasInstance {
1695    transform: [f32; 12],
1696    custom_data_and_mask: u32,
1697    shader_binding_table_record_offset_and_flags: u32,
1698    acceleration_structure_reference: u64,
1699}
1700
1701/// Arguments to the [`CreateDeviceCallback`].
1702#[derive(Debug)]
1703pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1704where
1705    'this: 'pnext,
1706{
1707    /// The extensions to enable for the device. You must not remove anything from this list,
1708    /// but you may add to it.
1709    pub extensions: &'arg mut Vec<&'static CStr>,
1710    /// The physical device features to enable. You may enable features, but must not disable any.
1711    pub device_features: &'arg mut PhysicalDeviceFeatures,
1712    /// The queue create infos for the device. You may add or modify queue create infos as needed.
1713    pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1714    /// The create info for the device. You may add or modify things in the pnext chain, but
1715    /// do not turn features off. Additionally, do not add things to the list of extensions,
1716    /// or to the feature set, as all changes to that member will be overwritten.
1717    pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1718    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1719    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1720    /// don't actually directly use `'this`
1721    _phantom: PhantomData<&'this ()>,
1722}
1723
1724/// Callback to allow changing the vulkan device creation parameters.
1725///
1726/// # Safety:
1727/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1728///   as the create info value will be overwritten.
1729/// - Callback must not remove features.
1730/// - Callback must not change anything to what the instance does not support.
1731pub type CreateDeviceCallback<'this> =
1732    dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1733
1734/// Arguments to the [`CreateInstanceCallback`].
1735#[expect(missing_debug_implementations, reason = "TODO?")]
1736pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1737where
1738    'this: 'pnext,
1739{
1740    /// The extensions to enable for the instance. You must not remove anything from this list,
1741    /// but you may add to it.
1742    pub extensions: &'arg mut Vec<&'static CStr>,
1743    /// The create info for the instance. You may add or modify things in the pnext chain, but
1744    /// do not turn features off. Additionally, do not add things to the list of extensions,
1745    /// all changes to that member will be overwritten.
1746    pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1747    /// Vulkan entry point.
1748    pub entry: &'arg ash::Entry,
1749    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1750    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1751    /// don't actually directly use `'this`
1752    _phantom: PhantomData<&'this ()>,
1753}
1754
1755/// Callback to allow changing the vulkan instance creation parameters.
1756///
1757/// # Safety:
1758/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1759///   as the create info value will be overwritten.
1760/// - Callback must not remove features.
1761/// - Callback must not change anything to what the instance does not support.
1762pub type CreateInstanceCallback<'this> =
1763    dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
wgpu_hal/vulkan/mod.rs

wgpu_hal/vulkan/
mod.rs