wgpu_hal/vulkan/
mod.rs

1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8  - temporarily allocating `Vec` on heap, where overhead is permitted
9  - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35
36pub use adapter::PhysicalDeviceFeatures;
37
38use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
39use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
40
41use arrayvec::ArrayVec;
42use ash::{ext, khr, vk};
43use bytemuck::{Pod, Zeroable};
44use hashbrown::HashSet;
45use parking_lot::{Mutex, RwLock};
46
47use naga::FastHashMap;
48use wgt::InternalCounter;
49
50use semaphore_list::SemaphoreList;
51
52const MILLIS_TO_NANOS: u64 = 1_000_000;
53const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
54
55#[derive(Clone, Debug)]
56pub struct Api;
57
58impl crate::Api for Api {
59    type Instance = Instance;
60    type Surface = Surface;
61    type Adapter = Adapter;
62    type Device = Device;
63
64    type Queue = Queue;
65    type CommandEncoder = CommandEncoder;
66    type CommandBuffer = CommandBuffer;
67
68    type Buffer = Buffer;
69    type Texture = Texture;
70    type SurfaceTexture = SurfaceTexture;
71    type TextureView = TextureView;
72    type Sampler = Sampler;
73    type QuerySet = QuerySet;
74    type Fence = Fence;
75    type AccelerationStructure = AccelerationStructure;
76    type PipelineCache = PipelineCache;
77
78    type BindGroupLayout = BindGroupLayout;
79    type BindGroup = BindGroup;
80    type PipelineLayout = PipelineLayout;
81    type ShaderModule = ShaderModule;
82    type RenderPipeline = RenderPipeline;
83    type ComputePipeline = ComputePipeline;
84}
85
86crate::impl_dyn_resource!(
87    Adapter,
88    AccelerationStructure,
89    BindGroup,
90    BindGroupLayout,
91    Buffer,
92    CommandBuffer,
93    CommandEncoder,
94    ComputePipeline,
95    Device,
96    Fence,
97    Instance,
98    PipelineCache,
99    PipelineLayout,
100    QuerySet,
101    Queue,
102    RenderPipeline,
103    Sampler,
104    ShaderModule,
105    Surface,
106    SurfaceTexture,
107    Texture,
108    TextureView
109);
110
111struct DebugUtils {
112    extension: ext::debug_utils::Instance,
113    messenger: vk::DebugUtilsMessengerEXT,
114
115    /// Owning pointer to the debug messenger callback user data.
116    ///
117    /// `InstanceShared::drop` destroys the debug messenger before
118    /// dropping this, so the callback should never receive a dangling
119    /// user data pointer.
120    #[allow(dead_code)]
121    callback_data: Box<DebugUtilsMessengerUserData>,
122}
123
124pub struct DebugUtilsCreateInfo {
125    severity: vk::DebugUtilsMessageSeverityFlagsEXT,
126    message_type: vk::DebugUtilsMessageTypeFlagsEXT,
127    callback_data: Box<DebugUtilsMessengerUserData>,
128}
129
130#[derive(Debug)]
131/// The properties related to the validation layer needed for the
132/// DebugUtilsMessenger for their workarounds
133struct ValidationLayerProperties {
134    /// Validation layer description, from `vk::LayerProperties`.
135    layer_description: CString,
136
137    /// Validation layer specification version, from `vk::LayerProperties`.
138    layer_spec_version: u32,
139}
140
141/// User data needed by `instance::debug_utils_messenger_callback`.
142///
143/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
144/// pointer refers to one of these values.
145#[derive(Debug)]
146pub struct DebugUtilsMessengerUserData {
147    /// The properties related to the validation layer, if present
148    validation_layer_properties: Option<ValidationLayerProperties>,
149
150    /// If the OBS layer is present. OBS never increments the version of their layer,
151    /// so there's no reason to have the version.
152    has_obs_layer: bool,
153}
154
155pub struct InstanceShared {
156    raw: ash::Instance,
157    extensions: Vec<&'static CStr>,
158    drop_guard: Option<crate::DropGuard>,
159    flags: wgt::InstanceFlags,
160    memory_budget_thresholds: wgt::MemoryBudgetThresholds,
161    debug_utils: Option<DebugUtils>,
162    get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
163    entry: ash::Entry,
164    has_nv_optimus: bool,
165    android_sdk_version: u32,
166    /// The instance API version.
167    ///
168    /// Which is the version of Vulkan supported for instance-level functionality.
169    ///
170    /// It is associated with a `VkInstance` and its children,
171    /// except for a `VkPhysicalDevice` and its children.
172    instance_api_version: u32,
173}
174
175pub struct Instance {
176    shared: Arc<InstanceShared>,
177}
178
179/// The semaphores needed to use one image in a swapchain.
180#[derive(Debug)]
181struct SwapchainImageSemaphores {
182    /// A semaphore that is signaled when this image is safe for us to modify.
183    ///
184    /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
185    /// image that we should use, that image may actually still be in use by the
186    /// presentation engine, and is not yet safe to modify. However, that
187    /// function does accept a semaphore that it will signal when the image is
188    /// indeed safe to begin messing with.
189    ///
190    /// This semaphore is:
191    ///
192    /// - waited for by the first queue submission to operate on this image
193    ///   since it was acquired, and
194    ///
195    /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
196    ///   for us to use.
197    ///
198    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
199    acquire: vk::Semaphore,
200
201    /// True if the next command submission operating on this image should wait
202    /// for [`acquire`].
203    ///
204    /// We must wait for `acquire` before drawing to this swapchain image, but
205    /// because `wgpu-hal` queue submissions are always strongly ordered, only
206    /// the first submission that works with a swapchain image actually needs to
207    /// wait. We set this flag when this image is acquired, and clear it the
208    /// first time it's passed to [`Queue::submit`] as a surface texture.
209    ///
210    /// [`acquire`]: SwapchainImageSemaphores::acquire
211    /// [`Queue::submit`]: crate::Queue::submit
212    should_wait_for_acquire: bool,
213
214    /// A pool of semaphores for ordering presentation after drawing.
215    ///
216    /// The first [`present_index`] semaphores in this vector are:
217    ///
218    /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
219    ///   image, and
220    ///
221    /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
222    ///   this image, when the submission finishes execution.
223    ///
224    /// This vector accumulates one semaphore per submission that writes to this
225    /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
226    /// requires a semaphore to order it with respect to drawing commands, and
227    /// we can't attach new completion semaphores to a command submission after
228    /// it's been submitted. This means that, at submission time, we must create
229    /// the semaphore we might need if the caller's next action is to enqueue a
230    /// presentation of this image.
231    ///
232    /// An alternative strategy would be for presentation to enqueue an empty
233    /// submit, ordered relative to other submits in the usual way, and
234    /// signaling a single presentation semaphore. But we suspect that submits
235    /// are usually expensive enough, and semaphores usually cheap enough, that
236    /// performance-sensitive users will avoid making many submits, so that the
237    /// cost of accumulated semaphores will usually be less than the cost of an
238    /// additional submit.
239    ///
240    /// Only the first [`present_index`] semaphores in the vector are actually
241    /// going to be signalled by submitted commands, and need to be waited for
242    /// by the next present call. Any semaphores beyond that index were created
243    /// for prior presents and are simply being retained for recycling.
244    ///
245    /// [`present_index`]: SwapchainImageSemaphores::present_index
246    /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
247    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
248    present: Vec<vk::Semaphore>,
249
250    /// The number of semaphores in [`present`] to be signalled for this submission.
251    ///
252    /// [`present`]: SwapchainImageSemaphores::present
253    present_index: usize,
254
255    /// The fence value of the last command submission that wrote to this image.
256    ///
257    /// The next time we try to acquire this image, we'll block until
258    /// this submission finishes, proving that [`acquire`] is ready to
259    /// pass to `vkAcquireNextImageKHR` again.
260    ///
261    /// [`acquire`]: SwapchainImageSemaphores::acquire
262    previously_used_submission_index: crate::FenceValue,
263}
264
265impl SwapchainImageSemaphores {
266    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
267        Ok(Self {
268            acquire: device.new_binary_semaphore()?,
269            should_wait_for_acquire: true,
270            present: Vec::new(),
271            present_index: 0,
272            previously_used_submission_index: 0,
273        })
274    }
275
276    fn set_used_fence_value(&mut self, value: crate::FenceValue) {
277        self.previously_used_submission_index = value;
278    }
279
280    /// Return the semaphore that commands drawing to this image should wait for, if any.
281    ///
282    /// This only returns `Some` once per acquisition; see
283    /// [`SwapchainImageSemaphores::should_wait_for_acquire`] for details.
284    fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
285        if self.should_wait_for_acquire {
286            self.should_wait_for_acquire = false;
287            Some(self.acquire)
288        } else {
289            None
290        }
291    }
292
293    /// Return a semaphore that a submission that writes to this image should
294    /// signal when it's done.
295    ///
296    /// See [`SwapchainImageSemaphores::present`] for details.
297    fn get_submit_signal_semaphore(
298        &mut self,
299        device: &DeviceShared,
300    ) -> Result<vk::Semaphore, crate::DeviceError> {
301        // Try to recycle a semaphore we created for a previous presentation.
302        let sem = match self.present.get(self.present_index) {
303            Some(sem) => *sem,
304            None => {
305                let sem = device.new_binary_semaphore()?;
306                self.present.push(sem);
307                sem
308            }
309        };
310
311        self.present_index += 1;
312
313        Ok(sem)
314    }
315
316    /// Return the semaphores that a presentation of this image should wait on.
317    ///
318    /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
319    /// ends this image's acquisition should wait for. See
320    /// [`SwapchainImageSemaphores::present`] for details.
321    ///
322    /// Reset `self` to be ready for the next acquisition cycle.
323    ///
324    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
325    fn get_present_wait_semaphores(&mut self) -> &[vk::Semaphore] {
326        let old_index = self.present_index;
327
328        // Since this marks the end of this acquire/draw/present cycle, take the
329        // opportunity to reset `self` in preparation for the next acquisition.
330        self.present_index = 0;
331        self.should_wait_for_acquire = true;
332
333        &self.present[0..old_index]
334    }
335
336    unsafe fn destroy(&self, device: &ash::Device) {
337        unsafe {
338            device.destroy_semaphore(self.acquire, None);
339            for sem in &self.present {
340                device.destroy_semaphore(*sem, None);
341            }
342        }
343    }
344}
345
346struct Swapchain {
347    raw: vk::SwapchainKHR,
348    functor: khr::swapchain::Device,
349    device: Arc<DeviceShared>,
350    images: Vec<vk::Image>,
351    config: crate::SurfaceConfiguration,
352    /// One wait semaphore per swapchain image. This will be associated with the
353    /// surface texture, and later collected during submission.
354    ///
355    /// We need this to be `Arc<Mutex<>>` because we need to be able to pass this
356    /// data into the surface texture, so submit/present can use it.
357    surface_semaphores: Vec<Arc<Mutex<SwapchainImageSemaphores>>>,
358    /// The index of the next semaphore to use. Ideally we would use the same
359    /// index as the image index, but we need to specify the semaphore as an argument
360    /// to the acquire_next_image function which is what tells us which image to use.
361    next_semaphore_index: usize,
362    /// The present timing information which will be set in the next call to [`present()`](crate::Queue::present()).
363    ///
364    /// # Safety
365    ///
366    /// This must only be set if [`wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING`] is enabled, and
367    /// so the VK_GOOGLE_display_timing extension is present.
368    next_present_time: Option<vk::PresentTimeGOOGLE>,
369}
370
371impl Swapchain {
372    fn advance_surface_semaphores(&mut self) {
373        let semaphore_count = self.surface_semaphores.len();
374        self.next_semaphore_index = (self.next_semaphore_index + 1) % semaphore_count;
375    }
376
377    fn get_surface_semaphores(&self) -> Arc<Mutex<SwapchainImageSemaphores>> {
378        self.surface_semaphores[self.next_semaphore_index].clone()
379    }
380}
381
382pub struct Surface {
383    raw: vk::SurfaceKHR,
384    functor: khr::surface::Instance,
385    instance: Arc<InstanceShared>,
386    swapchain: RwLock<Option<Swapchain>>,
387}
388
389impl Surface {
390    /// Get the raw Vulkan swapchain associated with this surface.
391    ///
392    /// Returns [`None`] if the surface is not configured.
393    pub fn raw_swapchain(&self) -> Option<vk::SwapchainKHR> {
394        let read = self.swapchain.read();
395        read.as_ref().map(|it| it.raw)
396    }
397
398    /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
399    /// using [VK_GOOGLE_display_timing].
400    ///
401    /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
402    /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
403    ///
404    /// This can also be used to add a "not before" timestamp to the presentation.
405    ///
406    /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
407    ///
408    /// # Panics
409    ///
410    /// - If the surface hasn't been configured.
411    /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
412    ///
413    /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
414    #[track_caller]
415    pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
416        let mut swapchain = self.swapchain.write();
417        let swapchain = swapchain
418            .as_mut()
419            .expect("Surface should have been configured");
420        let features = wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING;
421        if swapchain.device.features.contains(features) {
422            swapchain.next_present_time = Some(present_timing);
423        } else {
424            // Ideally we'd use something like `device.required_features` here, but that's in `wgpu-core`, which we are a dependency of
425            panic!(
426                concat!(
427                    "Tried to set display timing properties ",
428                    "without the corresponding feature ({:?}) enabled."
429                ),
430                features
431            );
432        }
433    }
434}
435
436#[derive(Debug)]
437pub struct SurfaceTexture {
438    index: u32,
439    texture: Texture,
440    surface_semaphores: Arc<Mutex<SwapchainImageSemaphores>>,
441}
442
443impl crate::DynSurfaceTexture for SurfaceTexture {}
444
445impl Borrow<Texture> for SurfaceTexture {
446    fn borrow(&self) -> &Texture {
447        &self.texture
448    }
449}
450
451impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
452    fn borrow(&self) -> &dyn crate::DynTexture {
453        &self.texture
454    }
455}
456
457pub struct Adapter {
458    raw: vk::PhysicalDevice,
459    instance: Arc<InstanceShared>,
460    //queue_families: Vec<vk::QueueFamilyProperties>,
461    known_memory_flags: vk::MemoryPropertyFlags,
462    phd_capabilities: adapter::PhysicalDeviceProperties,
463    phd_features: PhysicalDeviceFeatures,
464    downlevel_flags: wgt::DownlevelFlags,
465    private_caps: PrivateCapabilities,
466    workarounds: Workarounds,
467}
468
469// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
470enum ExtensionFn<T> {
471    /// The loaded function pointer struct for an extension.
472    Extension(T),
473    /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
474    Promoted,
475}
476
477struct DeviceExtensionFunctions {
478    debug_utils: Option<ext::debug_utils::Device>,
479    draw_indirect_count: Option<khr::draw_indirect_count::Device>,
480    timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
481    ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
482    mesh_shading: Option<ext::mesh_shader::Device>,
483}
484
485struct RayTracingDeviceExtensionFunctions {
486    acceleration_structure: khr::acceleration_structure::Device,
487    buffer_device_address: khr::buffer_device_address::Device,
488}
489
490/// Set of internal capabilities, which don't show up in the exposed
491/// device geometry, but affect the code paths taken internally.
492#[derive(Clone, Debug)]
493struct PrivateCapabilities {
494    image_view_usage: bool,
495    timeline_semaphores: bool,
496    texture_d24: bool,
497    texture_d24_s8: bool,
498    texture_s8: bool,
499    /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
500    can_present: bool,
501    non_coherent_map_mask: wgt::BufferAddress,
502
503    /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
504    ///
505    /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
506    /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
507    /// a given bindgroup binding outside that binding's [accessible
508    /// region][ar]. Enabling `robustBufferAccess` does ensure that
509    /// out-of-bounds reads and writes are not undefined behavior (that's good),
510    /// but still permits out-of-bounds reads to return data from anywhere
511    /// within the buffer, not just the accessible region.
512    ///
513    /// [ar]: ../struct.BufferBinding.html#accessible-region
514    /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
515    robust_buffer_access: bool,
516
517    robust_image_access: bool,
518
519    /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
520    /// [`robustBufferAccess2`] feature.
521    ///
522    /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
523    /// shader accesses to buffer contents. If this feature is not available,
524    /// this backend must have Naga inject bounds checks in the generated
525    /// SPIR-V.
526    ///
527    /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
528    /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
529    /// [ar]: ../struct.BufferBinding.html#accessible-region
530    robust_buffer_access2: bool,
531
532    robust_image_access2: bool,
533    zero_initialize_workgroup_memory: bool,
534    image_format_list: bool,
535    maximum_samplers: u32,
536
537    /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
538    /// (promoted to Vulkan 1.3).
539    ///
540    /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
541    ///
542    /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
543    shader_integer_dot_product: bool,
544
545    /// True if this adapter supports 8-bit integers provided by the
546    /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
547    ///
548    /// Allows shaders to declare the "Int8" capability. Note, however, that this
549    /// feature alone allows the use of 8-bit integers "only in the `Private`,
550    /// `Workgroup` (for non-Block variables), and `Function` storage classes"
551    /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
552    /// `StorageBuffer`), you also need to enable the corresponding feature in
553    /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
554    /// capability (e.g., `StorageBuffer8BitAccess`).
555    ///
556    /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
557    /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
558    shader_int8: bool,
559}
560
561bitflags::bitflags!(
562    /// Workaround flags.
563    #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
564    pub struct Workarounds: u32 {
565        /// Only generate SPIR-V for one entry point at a time.
566        const SEPARATE_ENTRY_POINTS = 0x1;
567        /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
568        /// to a subpass resolve attachment array. This nulls out that pointer in that case.
569        const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
570        /// If the following code returns false, then nvidia will end up filling the wrong range.
571        ///
572        /// ```skip
573        /// fn nvidia_succeeds() -> bool {
574        ///   # let (copy_length, start_offset) = (0, 0);
575        ///     if copy_length >= 4096 {
576        ///         if start_offset % 16 != 0 {
577        ///             if copy_length == 4096 {
578        ///                 return true;
579        ///             }
580        ///             if copy_length % 16 == 0 {
581        ///                 return false;
582        ///             }
583        ///         }
584        ///     }
585        ///     true
586        /// }
587        /// ```
588        ///
589        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
590        /// if they cover a range of 4096 bytes or more.
591        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
592    }
593);
594
595#[derive(Clone, Debug, Eq, Hash, PartialEq)]
596struct AttachmentKey {
597    format: vk::Format,
598    layout: vk::ImageLayout,
599    ops: crate::AttachmentOps,
600}
601
602impl AttachmentKey {
603    /// Returns an attachment key for a compatible attachment.
604    fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
605        Self {
606            format,
607            layout,
608            ops: crate::AttachmentOps::all(),
609        }
610    }
611}
612
613#[derive(Clone, Eq, Hash, PartialEq)]
614struct ColorAttachmentKey {
615    base: AttachmentKey,
616    resolve: Option<AttachmentKey>,
617}
618
619#[derive(Clone, Eq, Hash, PartialEq)]
620struct DepthStencilAttachmentKey {
621    base: AttachmentKey,
622    stencil_ops: crate::AttachmentOps,
623}
624
625#[derive(Clone, Eq, Default, Hash, PartialEq)]
626struct RenderPassKey {
627    colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
628    depth_stencil: Option<DepthStencilAttachmentKey>,
629    sample_count: u32,
630    multiview: Option<NonZeroU32>,
631}
632
633struct DeviceShared {
634    raw: ash::Device,
635    family_index: u32,
636    queue_index: u32,
637    raw_queue: vk::Queue,
638    drop_guard: Option<crate::DropGuard>,
639    instance: Arc<InstanceShared>,
640    physical_device: vk::PhysicalDevice,
641    enabled_extensions: Vec<&'static CStr>,
642    extension_fns: DeviceExtensionFunctions,
643    vendor_id: u32,
644    pipeline_cache_validation_key: [u8; 16],
645    timestamp_period: f32,
646    private_caps: PrivateCapabilities,
647    workarounds: Workarounds,
648    features: wgt::Features,
649    render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
650    sampler_cache: Mutex<sampler::SamplerCache>,
651    memory_allocations_counter: InternalCounter,
652}
653
654impl Drop for DeviceShared {
655    fn drop(&mut self) {
656        for &raw in self.render_passes.lock().values() {
657            unsafe { self.raw.destroy_render_pass(raw, None) };
658        }
659        if self.drop_guard.is_none() {
660            unsafe { self.raw.destroy_device(None) };
661        }
662    }
663}
664
665pub struct Device {
666    shared: Arc<DeviceShared>,
667    mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
668    desc_allocator:
669        Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
670    valid_ash_memory_types: u32,
671    naga_options: naga::back::spv::Options<'static>,
672    #[cfg(feature = "renderdoc")]
673    render_doc: crate::auxil::renderdoc::RenderDoc,
674    counters: Arc<wgt::HalCounters>,
675}
676
677impl Drop for Device {
678    fn drop(&mut self) {
679        unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
680        unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
681    }
682}
683
684/// Semaphores for forcing queue submissions to run in order.
685///
686/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
687/// ordered, then the first submission will finish on the GPU before the second
688/// submission begins. To get this behavior on Vulkan we need to pass semaphores
689/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
690/// and to signal when their execution is done.
691///
692/// Normally this can be done with a single semaphore, waited on and then
693/// signalled for each submission. At any given time there's exactly one
694/// submission that would signal the semaphore, and exactly one waiting on it,
695/// as Vulkan requires.
696///
697/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
698/// hang if we use a single semaphore. The workaround is to alternate between
699/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
700/// the workaround until, say, Oct 2026.
701///
702/// [`wgpu_hal::Queue`]: crate::Queue
703/// [`submit`]: crate::Queue::submit
704/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
705/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
706#[derive(Clone)]
707struct RelaySemaphores {
708    /// The semaphore the next submission should wait on before beginning
709    /// execution on the GPU. This is `None` for the first submission, which
710    /// should not wait on anything at all.
711    wait: Option<vk::Semaphore>,
712
713    /// The semaphore the next submission should signal when it has finished
714    /// execution on the GPU.
715    signal: vk::Semaphore,
716}
717
718impl RelaySemaphores {
719    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
720        Ok(Self {
721            wait: None,
722            signal: device.new_binary_semaphore()?,
723        })
724    }
725
726    /// Advances the semaphores, returning the semaphores that should be used for a submission.
727    fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
728        let old = self.clone();
729
730        // Build the state for the next submission.
731        match self.wait {
732            None => {
733                // The `old` values describe the first submission to this queue.
734                // The second submission should wait on `old.signal`, and then
735                // signal a new semaphore which we'll create now.
736                self.wait = Some(old.signal);
737                self.signal = device.new_binary_semaphore()?;
738            }
739            Some(ref mut wait) => {
740                // What this submission signals, the next should wait.
741                mem::swap(wait, &mut self.signal);
742            }
743        };
744
745        Ok(old)
746    }
747
748    /// Destroys the semaphores.
749    unsafe fn destroy(&self, device: &ash::Device) {
750        unsafe {
751            if let Some(wait) = self.wait {
752                device.destroy_semaphore(wait, None);
753            }
754            device.destroy_semaphore(self.signal, None);
755        }
756    }
757}
758
759pub struct Queue {
760    raw: vk::Queue,
761    swapchain_fn: khr::swapchain::Device,
762    device: Arc<DeviceShared>,
763    family_index: u32,
764    relay_semaphores: Mutex<RelaySemaphores>,
765    signal_semaphores: Mutex<SemaphoreList>,
766}
767
768impl Queue {
769    pub fn as_raw(&self) -> vk::Queue {
770        self.raw
771    }
772}
773
774impl Drop for Queue {
775    fn drop(&mut self) {
776        unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
777    }
778}
779#[derive(Debug)]
780enum BufferMemoryBacking {
781    Managed(gpu_alloc::MemoryBlock<vk::DeviceMemory>),
782    VulkanMemory {
783        memory: vk::DeviceMemory,
784        offset: u64,
785        size: u64,
786    },
787}
788impl BufferMemoryBacking {
789    fn memory(&self) -> &vk::DeviceMemory {
790        match self {
791            Self::Managed(m) => m.memory(),
792            Self::VulkanMemory { memory, .. } => memory,
793        }
794    }
795    fn offset(&self) -> u64 {
796        match self {
797            Self::Managed(m) => m.offset(),
798            Self::VulkanMemory { offset, .. } => *offset,
799        }
800    }
801    fn size(&self) -> u64 {
802        match self {
803            Self::Managed(m) => m.size(),
804            Self::VulkanMemory { size, .. } => *size,
805        }
806    }
807}
808#[derive(Debug)]
809pub struct Buffer {
810    raw: vk::Buffer,
811    block: Option<Mutex<BufferMemoryBacking>>,
812}
813impl Buffer {
814    /// # Safety
815    ///
816    /// - `vk_buffer`'s memory must be managed by the caller
817    /// - Externally imported buffers can't be mapped by `wgpu`
818    pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
819        Self {
820            raw: vk_buffer,
821            block: None,
822        }
823    }
824    /// # Safety
825    /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
826    /// - Externally imported buffers can't be mapped by `wgpu`
827    /// - `offset` and `size` must be valid with the allocation of `memory`
828    pub unsafe fn from_raw_managed(
829        vk_buffer: vk::Buffer,
830        memory: vk::DeviceMemory,
831        offset: u64,
832        size: u64,
833    ) -> Self {
834        Self {
835            raw: vk_buffer,
836            block: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
837                memory,
838                offset,
839                size,
840            })),
841        }
842    }
843}
844
845impl crate::DynBuffer for Buffer {}
846
847#[derive(Debug)]
848pub struct AccelerationStructure {
849    raw: vk::AccelerationStructureKHR,
850    buffer: vk::Buffer,
851    block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
852    compacted_size_query: Option<vk::QueryPool>,
853}
854
855impl crate::DynAccelerationStructure for AccelerationStructure {}
856
857#[derive(Debug)]
858pub struct Texture {
859    raw: vk::Image,
860    drop_guard: Option<crate::DropGuard>,
861    external_memory: Option<vk::DeviceMemory>,
862    block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
863    format: wgt::TextureFormat,
864    copy_size: crate::CopyExtent,
865}
866
867impl crate::DynTexture for Texture {}
868
869impl Texture {
870    /// # Safety
871    ///
872    /// - The image handle must not be manually destroyed
873    pub unsafe fn raw_handle(&self) -> vk::Image {
874        self.raw
875    }
876}
877
878#[derive(Debug)]
879pub struct TextureView {
880    raw_texture: vk::Image,
881    raw: vk::ImageView,
882    layers: NonZeroU32,
883    format: wgt::TextureFormat,
884    raw_format: vk::Format,
885    base_mip_level: u32,
886    dimension: wgt::TextureViewDimension,
887}
888
889impl crate::DynTextureView for TextureView {}
890
891impl TextureView {
892    /// # Safety
893    ///
894    /// - The image view handle must not be manually destroyed
895    pub unsafe fn raw_handle(&self) -> vk::ImageView {
896        self.raw
897    }
898}
899
900#[derive(Debug)]
901pub struct Sampler {
902    raw: vk::Sampler,
903    create_info: vk::SamplerCreateInfo<'static>,
904}
905
906impl crate::DynSampler for Sampler {}
907
908#[derive(Debug)]
909pub struct BindGroupLayout {
910    raw: vk::DescriptorSetLayout,
911    desc_count: gpu_descriptor::DescriptorTotalCount,
912    types: Box<[(vk::DescriptorType, u32)]>,
913    /// Map of binding index to size,
914    binding_arrays: Vec<(u32, NonZeroU32)>,
915}
916
917impl crate::DynBindGroupLayout for BindGroupLayout {}
918
919#[derive(Debug)]
920pub struct PipelineLayout {
921    raw: vk::PipelineLayout,
922    binding_arrays: naga::back::spv::BindingMap,
923}
924
925impl crate::DynPipelineLayout for PipelineLayout {}
926
927#[derive(Debug)]
928pub struct BindGroup {
929    set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
930}
931
932impl crate::DynBindGroup for BindGroup {}
933
934/// Miscellaneous allocation recycling pool for `CommandAllocator`.
935#[derive(Default)]
936struct Temp {
937    marker: Vec<u8>,
938    buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
939    image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
940}
941
942impl Temp {
943    fn clear(&mut self) {
944        self.marker.clear();
945        self.buffer_barriers.clear();
946        self.image_barriers.clear();
947    }
948
949    fn make_c_str(&mut self, name: &str) -> &CStr {
950        self.marker.clear();
951        self.marker.extend_from_slice(name.as_bytes());
952        self.marker.push(0);
953        unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
954    }
955}
956
957#[derive(Clone, Eq, Hash, PartialEq)]
958struct FramebufferKey {
959    raw_pass: vk::RenderPass,
960    attachments: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
961    extent: wgt::Extent3d,
962}
963
964#[derive(Clone, Eq, Hash, PartialEq)]
965struct TempTextureViewKey {
966    texture: vk::Image,
967    format: vk::Format,
968    mip_level: u32,
969    depth_slice: u32,
970}
971
972pub struct CommandEncoder {
973    raw: vk::CommandPool,
974    device: Arc<DeviceShared>,
975
976    /// The current command buffer, if `self` is in the ["recording"]
977    /// state.
978    ///
979    /// ["recording"]: crate::CommandEncoder
980    ///
981    /// If non-`null`, the buffer is in the Vulkan "recording" state.
982    active: vk::CommandBuffer,
983
984    /// What kind of pass we are currently within: compute or render.
985    bind_point: vk::PipelineBindPoint,
986
987    /// Allocation recycling pool for this encoder.
988    temp: Temp,
989
990    /// A pool of available command buffers.
991    ///
992    /// These are all in the Vulkan "initial" state.
993    free: Vec<vk::CommandBuffer>,
994
995    /// A pool of discarded command buffers.
996    ///
997    /// These could be in any Vulkan state except "pending".
998    discarded: Vec<vk::CommandBuffer>,
999
1000    /// If this is true, the active renderpass enabled a debug span,
1001    /// and needs to be disabled on renderpass close.
1002    rpass_debug_marker_active: bool,
1003
1004    /// If set, the end of the next render/compute pass will write a timestamp at
1005    /// the given pool & location.
1006    end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1007
1008    framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1009    temp_texture_views: FastHashMap<TempTextureViewKey, vk::ImageView>,
1010
1011    counters: Arc<wgt::HalCounters>,
1012}
1013
1014impl Drop for CommandEncoder {
1015    fn drop(&mut self) {
1016        // SAFETY:
1017        //
1018        // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1019        // `CommandBuffer` must live until its execution is complete, and that a
1020        // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1021        // Thus, we know that none of our `CommandBuffers` are in the "pending"
1022        // state.
1023        //
1024        // The other VUIDs are pretty obvious.
1025        unsafe {
1026            // `vkDestroyCommandPool` also frees any command buffers allocated
1027            // from that pool, so there's no need to explicitly call
1028            // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1029            // fields.
1030            self.device.raw.destroy_command_pool(self.raw, None);
1031        }
1032
1033        for (_, fb) in self.framebuffers.drain() {
1034            unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1035        }
1036
1037        for (_, view) in self.temp_texture_views.drain() {
1038            unsafe { self.device.raw.destroy_image_view(view, None) };
1039        }
1040
1041        self.counters.command_encoders.sub(1);
1042    }
1043}
1044
1045impl CommandEncoder {
1046    /// # Safety
1047    ///
1048    /// - The command buffer handle must not be manually destroyed
1049    pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1050        self.active
1051    }
1052}
1053
1054impl fmt::Debug for CommandEncoder {
1055    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1056        f.debug_struct("CommandEncoder")
1057            .field("raw", &self.raw)
1058            .finish()
1059    }
1060}
1061
1062#[derive(Debug)]
1063pub struct CommandBuffer {
1064    raw: vk::CommandBuffer,
1065}
1066
1067impl crate::DynCommandBuffer for CommandBuffer {}
1068
1069#[derive(Debug)]
1070#[allow(clippy::large_enum_variant)]
1071pub enum ShaderModule {
1072    Raw(vk::ShaderModule),
1073    Intermediate {
1074        naga_shader: crate::NagaShader,
1075        runtime_checks: wgt::ShaderRuntimeChecks,
1076    },
1077}
1078
1079impl crate::DynShaderModule for ShaderModule {}
1080
1081#[derive(Debug)]
1082pub struct RenderPipeline {
1083    raw: vk::Pipeline,
1084}
1085
1086impl crate::DynRenderPipeline for RenderPipeline {}
1087
1088#[derive(Debug)]
1089pub struct ComputePipeline {
1090    raw: vk::Pipeline,
1091}
1092
1093impl crate::DynComputePipeline for ComputePipeline {}
1094
1095#[derive(Debug)]
1096pub struct PipelineCache {
1097    raw: vk::PipelineCache,
1098}
1099
1100impl crate::DynPipelineCache for PipelineCache {}
1101
1102#[derive(Debug)]
1103pub struct QuerySet {
1104    raw: vk::QueryPool,
1105}
1106
1107impl crate::DynQuerySet for QuerySet {}
1108
1109/// The [`Api::Fence`] type for [`vulkan::Api`].
1110///
1111/// This is an `enum` because there are two possible implementations of
1112/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1113/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1114/// require non-1.0 features.
1115///
1116/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1117/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1118/// otherwise.
1119///
1120/// [`Api::Fence`]: crate::Api::Fence
1121/// [`vulkan::Api`]: Api
1122/// [`Device::create_fence`]: crate::Device::create_fence
1123/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1124/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1125/// [`FencePool`]: Fence::FencePool
1126#[derive(Debug)]
1127pub enum Fence {
1128    /// A Vulkan [timeline semaphore].
1129    ///
1130    /// These are simpler to use than Vulkan fences, since timeline semaphores
1131    /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1132    ///
1133    /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1134    /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1135    TimelineSemaphore(vk::Semaphore),
1136
1137    /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1138    ///
1139    /// The effective [`FenceValue`] of this variant is the greater of
1140    /// `last_completed` and the maximum value associated with a signalled fence
1141    /// in `active`.
1142    ///
1143    /// Fences are available in all versions of Vulkan, but since they only have
1144    /// two states, "signaled" and "unsignaled", we need to use a separate fence
1145    /// for each queue submission we might want to wait for, and remember which
1146    /// [`FenceValue`] each one represents.
1147    ///
1148    /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1149    /// [`FenceValue`]: crate::FenceValue
1150    FencePool {
1151        last_completed: crate::FenceValue,
1152        /// The pending fence values have to be ascending.
1153        active: Vec<(crate::FenceValue, vk::Fence)>,
1154        free: Vec<vk::Fence>,
1155    },
1156}
1157
1158impl crate::DynFence for Fence {}
1159
1160impl Fence {
1161    /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1162    ///
1163    /// As an optimization, assume that we already know that the fence has
1164    /// reached `last_completed`, and don't bother checking fences whose values
1165    /// are less than that: those fences remain in the `active` array only
1166    /// because we haven't called `maintain` yet to clean them up.
1167    ///
1168    /// [`FenceValue`]: crate::FenceValue
1169    fn check_active(
1170        device: &ash::Device,
1171        mut last_completed: crate::FenceValue,
1172        active: &[(crate::FenceValue, vk::Fence)],
1173    ) -> Result<crate::FenceValue, crate::DeviceError> {
1174        for &(value, raw) in active.iter() {
1175            unsafe {
1176                if value > last_completed
1177                    && device
1178                        .get_fence_status(raw)
1179                        .map_err(map_host_device_oom_and_lost_err)?
1180                {
1181                    last_completed = value;
1182                }
1183            }
1184        }
1185        Ok(last_completed)
1186    }
1187
1188    /// Return the highest signalled [`FenceValue`] for `self`.
1189    ///
1190    /// [`FenceValue`]: crate::FenceValue
1191    fn get_latest(
1192        &self,
1193        device: &ash::Device,
1194        extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1195    ) -> Result<crate::FenceValue, crate::DeviceError> {
1196        match *self {
1197            Self::TimelineSemaphore(raw) => unsafe {
1198                Ok(match *extension.unwrap() {
1199                    ExtensionFn::Extension(ref ext) => ext
1200                        .get_semaphore_counter_value(raw)
1201                        .map_err(map_host_device_oom_and_lost_err)?,
1202                    ExtensionFn::Promoted => device
1203                        .get_semaphore_counter_value(raw)
1204                        .map_err(map_host_device_oom_and_lost_err)?,
1205                })
1206            },
1207            Self::FencePool {
1208                last_completed,
1209                ref active,
1210                free: _,
1211            } => Self::check_active(device, last_completed, active),
1212        }
1213    }
1214
1215    /// Trim the internal state of this [`Fence`].
1216    ///
1217    /// This function has no externally visible effect, but you should call it
1218    /// periodically to keep this fence's resource consumption under control.
1219    ///
1220    /// For fences using the [`FencePool`] implementation, this function
1221    /// recycles fences that have been signaled. If you don't call this,
1222    /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1223    /// time it's called.
1224    ///
1225    /// [`FencePool`]: Fence::FencePool
1226    /// [`Queue::submit`]: crate::Queue::submit
1227    fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1228        match *self {
1229            Self::TimelineSemaphore(_) => {}
1230            Self::FencePool {
1231                ref mut last_completed,
1232                ref mut active,
1233                ref mut free,
1234            } => {
1235                let latest = Self::check_active(device, *last_completed, active)?;
1236                let base_free = free.len();
1237                for &(value, raw) in active.iter() {
1238                    if value <= latest {
1239                        free.push(raw);
1240                    }
1241                }
1242                if free.len() != base_free {
1243                    active.retain(|&(value, _)| value > latest);
1244                    unsafe { device.reset_fences(&free[base_free..]) }
1245                        .map_err(map_device_oom_err)?
1246                }
1247                *last_completed = latest;
1248            }
1249        }
1250        Ok(())
1251    }
1252}
1253
1254impl crate::Queue for Queue {
1255    type A = Api;
1256
1257    unsafe fn submit(
1258        &self,
1259        command_buffers: &[&CommandBuffer],
1260        surface_textures: &[&SurfaceTexture],
1261        (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1262    ) -> Result<(), crate::DeviceError> {
1263        let mut fence_raw = vk::Fence::null();
1264
1265        let mut wait_stage_masks = Vec::new();
1266        let mut wait_semaphores = Vec::new();
1267        let mut signal_semaphores = SemaphoreList::default();
1268
1269        // Double check that the same swapchain image isn't being given to us multiple times,
1270        // as that will deadlock when we try to lock them all.
1271        debug_assert!(
1272            {
1273                let mut check = HashSet::with_capacity(surface_textures.len());
1274                // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
1275                for st in surface_textures {
1276                    check.insert(Arc::as_ptr(&st.surface_semaphores));
1277                }
1278                check.len() == surface_textures.len()
1279            },
1280            "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1281        );
1282
1283        let locked_swapchain_semaphores = surface_textures
1284            .iter()
1285            .map(|st| {
1286                st.surface_semaphores
1287                    .try_lock()
1288                    .expect("Failed to lock surface semaphore.")
1289            })
1290            .collect::<Vec<_>>();
1291
1292        for mut swapchain_semaphore in locked_swapchain_semaphores {
1293            swapchain_semaphore.set_used_fence_value(signal_value);
1294
1295            // If we're the first submission to operate on this image, wait on
1296            // its acquire semaphore, to make sure the presentation engine is
1297            // done with it.
1298            if let Some(sem) = swapchain_semaphore.get_acquire_wait_semaphore() {
1299                wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1300                wait_semaphores.push(sem);
1301            }
1302
1303            // Get a semaphore to signal when we're done writing to this surface
1304            // image. Presentation of this image will wait for this.
1305            let signal_semaphore = swapchain_semaphore.get_submit_signal_semaphore(&self.device)?;
1306            signal_semaphores.push_binary(signal_semaphore);
1307        }
1308
1309        let mut guard = self.signal_semaphores.lock();
1310        if !guard.is_empty() {
1311            signal_semaphores.append(&mut guard);
1312        }
1313
1314        // In order for submissions to be strictly ordered, we encode a dependency between each submission
1315        // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1316        let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1317
1318        if let Some(sem) = semaphore_state.wait {
1319            wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1320            wait_semaphores.push(sem);
1321        }
1322
1323        signal_semaphores.push_binary(semaphore_state.signal);
1324
1325        // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1326        signal_fence.maintain(&self.device.raw)?;
1327        match *signal_fence {
1328            Fence::TimelineSemaphore(raw) => {
1329                signal_semaphores.push_timeline(raw, signal_value);
1330            }
1331            Fence::FencePool {
1332                ref mut active,
1333                ref mut free,
1334                ..
1335            } => {
1336                fence_raw = match free.pop() {
1337                    Some(raw) => raw,
1338                    None => unsafe {
1339                        self.device
1340                            .raw
1341                            .create_fence(&vk::FenceCreateInfo::default(), None)
1342                            .map_err(map_host_device_oom_err)?
1343                    },
1344                };
1345                active.push((signal_value, fence_raw));
1346            }
1347        }
1348
1349        let vk_cmd_buffers = command_buffers
1350            .iter()
1351            .map(|cmd| cmd.raw)
1352            .collect::<Vec<_>>();
1353
1354        let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1355
1356        vk_info = vk_info
1357            .wait_semaphores(&wait_semaphores)
1358            .wait_dst_stage_mask(&wait_stage_masks);
1359
1360        let mut vk_timeline_info = mem::MaybeUninit::uninit();
1361        vk_info = signal_semaphores.add_to_submit(vk_info, &mut vk_timeline_info);
1362
1363        profiling::scope!("vkQueueSubmit");
1364        unsafe {
1365            self.device
1366                .raw
1367                .queue_submit(self.raw, &[vk_info], fence_raw)
1368                .map_err(map_host_device_oom_and_lost_err)?
1369        };
1370        Ok(())
1371    }
1372
1373    unsafe fn present(
1374        &self,
1375        surface: &Surface,
1376        texture: SurfaceTexture,
1377    ) -> Result<(), crate::SurfaceError> {
1378        let mut swapchain = surface.swapchain.write();
1379        let ssc = swapchain.as_mut().unwrap();
1380        let mut swapchain_semaphores = texture.surface_semaphores.lock();
1381
1382        let swapchains = [ssc.raw];
1383        let image_indices = [texture.index];
1384        let vk_info = vk::PresentInfoKHR::default()
1385            .swapchains(&swapchains)
1386            .image_indices(&image_indices)
1387            .wait_semaphores(swapchain_semaphores.get_present_wait_semaphores());
1388
1389        let mut display_timing;
1390        let present_times;
1391        let vk_info = if let Some(present_time) = ssc.next_present_time.take() {
1392            debug_assert!(
1393                ssc.device
1394                    .features
1395                    .contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING),
1396                "`next_present_time` should only be set if `VULKAN_GOOGLE_DISPLAY_TIMING` is enabled"
1397            );
1398            present_times = [present_time];
1399            display_timing = vk::PresentTimesInfoGOOGLE::default().times(&present_times);
1400            // SAFETY: We know that VK_GOOGLE_display_timing is present because of the safety contract on `next_present_time`.
1401            vk_info.push_next(&mut display_timing)
1402        } else {
1403            vk_info
1404        };
1405
1406        let suboptimal = {
1407            profiling::scope!("vkQueuePresentKHR");
1408            unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1409                match error {
1410                    vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1411                    vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1412                    // We don't use VK_EXT_full_screen_exclusive
1413                    // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
1414                    _ => map_host_device_oom_and_lost_err(error).into(),
1415                }
1416            })?
1417        };
1418        if suboptimal {
1419            // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1420            // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1421            // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1422            // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1423            #[cfg(not(target_os = "android"))]
1424            log::warn!("Suboptimal present of frame {}", texture.index);
1425        }
1426        Ok(())
1427    }
1428
1429    unsafe fn get_timestamp_period(&self) -> f32 {
1430        self.device.timestamp_period
1431    }
1432}
1433
1434impl Queue {
1435    pub fn raw_device(&self) -> &ash::Device {
1436        &self.device.raw
1437    }
1438
1439    pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1440        let mut guard = self.signal_semaphores.lock();
1441        if let Some(value) = semaphore_value {
1442            guard.push_timeline(semaphore, value);
1443        } else {
1444            guard.push_binary(semaphore);
1445        }
1446    }
1447}
1448
1449/// Maps
1450///
1451/// - VK_ERROR_OUT_OF_HOST_MEMORY
1452/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1453fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1454    match err {
1455        vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1456            get_oom_err(err)
1457        }
1458        e => get_unexpected_err(e),
1459    }
1460}
1461
1462/// Maps
1463///
1464/// - VK_ERROR_OUT_OF_HOST_MEMORY
1465/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1466/// - VK_ERROR_DEVICE_LOST
1467fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1468    match err {
1469        vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1470        other => map_host_device_oom_err(other),
1471    }
1472}
1473
1474/// Maps
1475///
1476/// - VK_ERROR_OUT_OF_HOST_MEMORY
1477/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1478/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1479fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1480    // We don't use VK_KHR_buffer_device_address
1481    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1482    map_host_device_oom_err(err)
1483}
1484
1485/// Maps
1486///
1487/// - VK_ERROR_OUT_OF_HOST_MEMORY
1488fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1489    match err {
1490        vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1491        e => get_unexpected_err(e),
1492    }
1493}
1494
1495/// Maps
1496///
1497/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1498fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1499    match err {
1500        vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1501        e => get_unexpected_err(e),
1502    }
1503}
1504
1505/// Maps
1506///
1507/// - VK_ERROR_OUT_OF_HOST_MEMORY
1508/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1509fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1510    // We don't use VK_KHR_buffer_device_address
1511    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1512    map_host_oom_err(err)
1513}
1514
1515/// Maps
1516///
1517/// - VK_ERROR_OUT_OF_HOST_MEMORY
1518/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1519/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1520/// - VK_ERROR_INVALID_SHADER_NV
1521fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1522    // We don't use VK_EXT_pipeline_creation_cache_control
1523    // VK_PIPELINE_COMPILE_REQUIRED_EXT
1524    // We don't use VK_NV_glsl_shader
1525    // VK_ERROR_INVALID_SHADER_NV
1526    map_host_device_oom_err(err)
1527}
1528
1529/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1530/// feature flag is enabled.
1531fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1532    #[cfg(feature = "internal_error_panic")]
1533    panic!("Unexpected Vulkan error: {_err:?}");
1534
1535    #[allow(unreachable_code)]
1536    crate::DeviceError::Unexpected
1537}
1538
1539/// Returns [`crate::DeviceError::OutOfMemory`].
1540fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1541    crate::DeviceError::OutOfMemory
1542}
1543
1544/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1545/// feature flag is enabled.
1546fn get_lost_err() -> crate::DeviceError {
1547    #[cfg(feature = "device_lost_panic")]
1548    panic!("Device lost");
1549
1550    #[allow(unreachable_code)]
1551    crate::DeviceError::Lost
1552}
1553
1554#[derive(Clone, Copy, Pod, Zeroable)]
1555#[repr(C)]
1556struct RawTlasInstance {
1557    transform: [f32; 12],
1558    custom_data_and_mask: u32,
1559    shader_binding_table_record_offset_and_flags: u32,
1560    acceleration_structure_reference: u64,
1561}
1562
1563/// Arguments to the [`CreateDeviceCallback`].
1564pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1565where
1566    'this: 'pnext,
1567{
1568    /// The extensions to enable for the device. You must not remove anything from this list,
1569    /// but you may add to it.
1570    pub extensions: &'arg mut Vec<&'static CStr>,
1571    /// The physical device features to enable. You may enable features, but must not disable any.
1572    pub device_features: &'arg mut PhysicalDeviceFeatures,
1573    /// The queue create infos for the device. You may add or modify queue create infos as needed.
1574    pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1575    /// The create info for the device. You may add or modify things in the pnext chain, but
1576    /// do not turn features off. Additionally, do not add things to the list of extensions,
1577    /// or to the feature set, as all changes to that member will be overwritten.
1578    pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1579    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1580    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1581    /// don't actually directly use `'this`
1582    _phantom: PhantomData<&'this ()>,
1583}
1584
1585/// Callback to allow changing the vulkan device creation parameters.
1586///
1587/// # Safety:
1588/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1589///   as the create info value will be overwritten.
1590/// - Callback must not remove features.
1591/// - Callback must not change anything to what the instance does not support.
1592pub type CreateDeviceCallback<'this> =
1593    dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1594
1595/// Arguments to the [`CreateInstanceCallback`].
1596pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1597where
1598    'this: 'pnext,
1599{
1600    /// The extensions to enable for the instance. You must not remove anything from this list,
1601    /// but you may add to it.
1602    pub extensions: &'arg mut Vec<&'static CStr>,
1603    /// The create info for the instance. You may add or modify things in the pnext chain, but
1604    /// do not turn features off. Additionally, do not add things to the list of extensions,
1605    /// all changes to that member will be overwritten.
1606    pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1607    /// Vulkan entry point.
1608    pub entry: &'arg ash::Entry,
1609    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1610    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1611    /// don't actually directly use `'this`
1612    _phantom: PhantomData<&'this ()>,
1613}
1614
1615/// Callback to allow changing the vulkan instance creation parameters.
1616///
1617/// # Safety:
1618/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1619///   as the create info value will be overwritten.
1620/// - Callback must not remove features.
1621/// - Callback must not change anything to what the instance does not support.
1622pub type CreateInstanceCallback<'this> =
1623    dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;