wgpu_hal/vulkan/
mod.rs

1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8  - temporarily allocating `Vec` on heap, where overhead is permitted
9  - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27#![allow(clippy::std_instead_of_alloc, clippy::std_instead_of_core)]
28
29mod adapter;
30mod command;
31mod conv;
32mod device;
33mod drm;
34mod instance;
35mod sampler;
36
37use std::{
38    borrow::Borrow,
39    boxed::Box,
40    ffi::{CStr, CString},
41    fmt, mem,
42    num::NonZeroU32,
43    ops::DerefMut,
44    sync::Arc,
45    vec::Vec,
46};
47
48use arrayvec::ArrayVec;
49use ash::{ext, khr, vk};
50use bytemuck::{Pod, Zeroable};
51use hashbrown::HashSet;
52use parking_lot::{Mutex, RwLock};
53
54use naga::FastHashMap;
55use wgt::InternalCounter;
56
57const MILLIS_TO_NANOS: u64 = 1_000_000;
58const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
59
60#[derive(Clone, Debug)]
61pub struct Api;
62
63impl crate::Api for Api {
64    type Instance = Instance;
65    type Surface = Surface;
66    type Adapter = Adapter;
67    type Device = Device;
68
69    type Queue = Queue;
70    type CommandEncoder = CommandEncoder;
71    type CommandBuffer = CommandBuffer;
72
73    type Buffer = Buffer;
74    type Texture = Texture;
75    type SurfaceTexture = SurfaceTexture;
76    type TextureView = TextureView;
77    type Sampler = Sampler;
78    type QuerySet = QuerySet;
79    type Fence = Fence;
80    type AccelerationStructure = AccelerationStructure;
81    type PipelineCache = PipelineCache;
82
83    type BindGroupLayout = BindGroupLayout;
84    type BindGroup = BindGroup;
85    type PipelineLayout = PipelineLayout;
86    type ShaderModule = ShaderModule;
87    type RenderPipeline = RenderPipeline;
88    type ComputePipeline = ComputePipeline;
89}
90
91crate::impl_dyn_resource!(
92    Adapter,
93    AccelerationStructure,
94    BindGroup,
95    BindGroupLayout,
96    Buffer,
97    CommandBuffer,
98    CommandEncoder,
99    ComputePipeline,
100    Device,
101    Fence,
102    Instance,
103    PipelineCache,
104    PipelineLayout,
105    QuerySet,
106    Queue,
107    RenderPipeline,
108    Sampler,
109    ShaderModule,
110    Surface,
111    SurfaceTexture,
112    Texture,
113    TextureView
114);
115
116struct DebugUtils {
117    extension: ext::debug_utils::Instance,
118    messenger: vk::DebugUtilsMessengerEXT,
119
120    /// Owning pointer to the debug messenger callback user data.
121    ///
122    /// `InstanceShared::drop` destroys the debug messenger before
123    /// dropping this, so the callback should never receive a dangling
124    /// user data pointer.
125    #[allow(dead_code)]
126    callback_data: Box<DebugUtilsMessengerUserData>,
127}
128
129pub struct DebugUtilsCreateInfo {
130    severity: vk::DebugUtilsMessageSeverityFlagsEXT,
131    message_type: vk::DebugUtilsMessageTypeFlagsEXT,
132    callback_data: Box<DebugUtilsMessengerUserData>,
133}
134
135#[derive(Debug)]
136/// The properties related to the validation layer needed for the
137/// DebugUtilsMessenger for their workarounds
138struct ValidationLayerProperties {
139    /// Validation layer description, from `vk::LayerProperties`.
140    layer_description: CString,
141
142    /// Validation layer specification version, from `vk::LayerProperties`.
143    layer_spec_version: u32,
144}
145
146/// User data needed by `instance::debug_utils_messenger_callback`.
147///
148/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
149/// pointer refers to one of these values.
150#[derive(Debug)]
151pub struct DebugUtilsMessengerUserData {
152    /// The properties related to the validation layer, if present
153    validation_layer_properties: Option<ValidationLayerProperties>,
154
155    /// If the OBS layer is present. OBS never increments the version of their layer,
156    /// so there's no reason to have the version.
157    has_obs_layer: bool,
158}
159
160pub struct InstanceShared {
161    raw: ash::Instance,
162    extensions: Vec<&'static CStr>,
163    drop_guard: Option<crate::DropGuard>,
164    flags: wgt::InstanceFlags,
165    debug_utils: Option<DebugUtils>,
166    get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
167    entry: ash::Entry,
168    has_nv_optimus: bool,
169    android_sdk_version: u32,
170    /// The instance API version.
171    ///
172    /// Which is the version of Vulkan supported for instance-level functionality.
173    ///
174    /// It is associated with a `VkInstance` and its children,
175    /// except for a `VkPhysicalDevice` and its children.
176    instance_api_version: u32,
177}
178
179pub struct Instance {
180    shared: Arc<InstanceShared>,
181}
182
183/// The semaphores needed to use one image in a swapchain.
184#[derive(Debug)]
185struct SwapchainImageSemaphores {
186    /// A semaphore that is signaled when this image is safe for us to modify.
187    ///
188    /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
189    /// image that we should use, that image may actually still be in use by the
190    /// presentation engine, and is not yet safe to modify. However, that
191    /// function does accept a semaphore that it will signal when the image is
192    /// indeed safe to begin messing with.
193    ///
194    /// This semaphore is:
195    ///
196    /// - waited for by the first queue submission to operate on this image
197    ///   since it was acquired, and
198    ///
199    /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
200    ///   for us to use.
201    ///
202    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
203    acquire: vk::Semaphore,
204
205    /// True if the next command submission operating on this image should wait
206    /// for [`acquire`].
207    ///
208    /// We must wait for `acquire` before drawing to this swapchain image, but
209    /// because `wgpu-hal` queue submissions are always strongly ordered, only
210    /// the first submission that works with a swapchain image actually needs to
211    /// wait. We set this flag when this image is acquired, and clear it the
212    /// first time it's passed to [`Queue::submit`] as a surface texture.
213    ///
214    /// [`acquire`]: SwapchainImageSemaphores::acquire
215    /// [`Queue::submit`]: crate::Queue::submit
216    should_wait_for_acquire: bool,
217
218    /// A pool of semaphores for ordering presentation after drawing.
219    ///
220    /// The first [`present_index`] semaphores in this vector are:
221    ///
222    /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
223    ///   image, and
224    ///
225    /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
226    ///   this image, when the submission finishes execution.
227    ///
228    /// This vector accumulates one semaphore per submission that writes to this
229    /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
230    /// requires a semaphore to order it with respect to drawing commands, and
231    /// we can't attach new completion semaphores to a command submission after
232    /// it's been submitted. This means that, at submission time, we must create
233    /// the semaphore we might need if the caller's next action is to enqueue a
234    /// presentation of this image.
235    ///
236    /// An alternative strategy would be for presentation to enqueue an empty
237    /// submit, ordered relative to other submits in the usual way, and
238    /// signaling a single presentation semaphore. But we suspect that submits
239    /// are usually expensive enough, and semaphores usually cheap enough, that
240    /// performance-sensitive users will avoid making many submits, so that the
241    /// cost of accumulated semaphores will usually be less than the cost of an
242    /// additional submit.
243    ///
244    /// Only the first [`present_index`] semaphores in the vector are actually
245    /// going to be signalled by submitted commands, and need to be waited for
246    /// by the next present call. Any semaphores beyond that index were created
247    /// for prior presents and are simply being retained for recycling.
248    ///
249    /// [`present_index`]: SwapchainImageSemaphores::present_index
250    /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
251    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
252    present: Vec<vk::Semaphore>,
253
254    /// The number of semaphores in [`present`] to be signalled for this submission.
255    ///
256    /// [`present`]: SwapchainImageSemaphores::present
257    present_index: usize,
258
259    /// The fence value of the last command submission that wrote to this image.
260    ///
261    /// The next time we try to acquire this image, we'll block until
262    /// this submission finishes, proving that [`acquire`] is ready to
263    /// pass to `vkAcquireNextImageKHR` again.
264    ///
265    /// [`acquire`]: SwapchainImageSemaphores::acquire
266    previously_used_submission_index: crate::FenceValue,
267}
268
269impl SwapchainImageSemaphores {
270    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
271        Ok(Self {
272            acquire: device.new_binary_semaphore()?,
273            should_wait_for_acquire: true,
274            present: Vec::new(),
275            present_index: 0,
276            previously_used_submission_index: 0,
277        })
278    }
279
280    fn set_used_fence_value(&mut self, value: crate::FenceValue) {
281        self.previously_used_submission_index = value;
282    }
283
284    /// Return the semaphore that commands drawing to this image should wait for, if any.
285    ///
286    /// This only returns `Some` once per acquisition; see
287    /// [`SwapchainImageSemaphores::should_wait_for_acquire`] for details.
288    fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
289        if self.should_wait_for_acquire {
290            self.should_wait_for_acquire = false;
291            Some(self.acquire)
292        } else {
293            None
294        }
295    }
296
297    /// Return a semaphore that a submission that writes to this image should
298    /// signal when it's done.
299    ///
300    /// See [`SwapchainImageSemaphores::present`] for details.
301    fn get_submit_signal_semaphore(
302        &mut self,
303        device: &DeviceShared,
304    ) -> Result<vk::Semaphore, crate::DeviceError> {
305        // Try to recycle a semaphore we created for a previous presentation.
306        let sem = match self.present.get(self.present_index) {
307            Some(sem) => *sem,
308            None => {
309                let sem = device.new_binary_semaphore()?;
310                self.present.push(sem);
311                sem
312            }
313        };
314
315        self.present_index += 1;
316
317        Ok(sem)
318    }
319
320    /// Return the semaphores that a presentation of this image should wait on.
321    ///
322    /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
323    /// ends this image's acquisition should wait for. See
324    /// [`SwapchainImageSemaphores::present`] for details.
325    ///
326    /// Reset `self` to be ready for the next acquisition cycle.
327    ///
328    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
329    fn get_present_wait_semaphores(&mut self) -> &[vk::Semaphore] {
330        let old_index = self.present_index;
331
332        // Since this marks the end of this acquire/draw/present cycle, take the
333        // opportunity to reset `self` in preparation for the next acquisition.
334        self.present_index = 0;
335        self.should_wait_for_acquire = true;
336
337        &self.present[0..old_index]
338    }
339
340    unsafe fn destroy(&self, device: &ash::Device) {
341        unsafe {
342            device.destroy_semaphore(self.acquire, None);
343            for sem in &self.present {
344                device.destroy_semaphore(*sem, None);
345            }
346        }
347    }
348}
349
350struct Swapchain {
351    raw: vk::SwapchainKHR,
352    raw_flags: vk::SwapchainCreateFlagsKHR,
353    functor: khr::swapchain::Device,
354    device: Arc<DeviceShared>,
355    images: Vec<vk::Image>,
356    config: crate::SurfaceConfiguration,
357    view_formats: Vec<wgt::TextureFormat>,
358    /// One wait semaphore per swapchain image. This will be associated with the
359    /// surface texture, and later collected during submission.
360    ///
361    /// We need this to be `Arc<Mutex<>>` because we need to be able to pass this
362    /// data into the surface texture, so submit/present can use it.
363    surface_semaphores: Vec<Arc<Mutex<SwapchainImageSemaphores>>>,
364    /// The index of the next semaphore to use. Ideally we would use the same
365    /// index as the image index, but we need to specify the semaphore as an argument
366    /// to the acquire_next_image function which is what tells us which image to use.
367    next_semaphore_index: usize,
368    /// The present timing information which will be set in the next call to [`present()`](crate::Queue::present()).
369    ///
370    /// # Safety
371    ///
372    /// This must only be set if [`wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING`] is enabled, and
373    /// so the VK_GOOGLE_display_timing extension is present.
374    next_present_time: Option<vk::PresentTimeGOOGLE>,
375}
376
377impl Swapchain {
378    fn advance_surface_semaphores(&mut self) {
379        let semaphore_count = self.surface_semaphores.len();
380        self.next_semaphore_index = (self.next_semaphore_index + 1) % semaphore_count;
381    }
382
383    fn get_surface_semaphores(&self) -> Arc<Mutex<SwapchainImageSemaphores>> {
384        self.surface_semaphores[self.next_semaphore_index].clone()
385    }
386}
387
388pub struct Surface {
389    raw: vk::SurfaceKHR,
390    functor: khr::surface::Instance,
391    instance: Arc<InstanceShared>,
392    swapchain: RwLock<Option<Swapchain>>,
393}
394
395impl Surface {
396    /// Get the raw Vulkan swapchain associated with this surface.
397    ///
398    /// Returns [`None`] if the surface is not configured.
399    pub fn raw_swapchain(&self) -> Option<vk::SwapchainKHR> {
400        let read = self.swapchain.read();
401        read.as_ref().map(|it| it.raw)
402    }
403
404    /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
405    /// using [VK_GOOGLE_display_timing].
406    ///
407    /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
408    /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
409    ///
410    /// This can also be used to add a "not before" timestamp to the presentation.
411    ///
412    /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
413    ///
414    /// # Panics
415    ///
416    /// - If the surface hasn't been configured.
417    /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
418    ///
419    /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
420    #[track_caller]
421    pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
422        let mut swapchain = self.swapchain.write();
423        let swapchain = swapchain
424            .as_mut()
425            .expect("Surface should have been configured");
426        let features = wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING;
427        if swapchain.device.features.contains(features) {
428            swapchain.next_present_time = Some(present_timing);
429        } else {
430            // Ideally we'd use something like `device.required_features` here, but that's in `wgpu-core`, which we are a dependency of
431            panic!(
432                concat!(
433                    "Tried to set display timing properties ",
434                    "without the corresponding feature ({:?}) enabled."
435                ),
436                features
437            );
438        }
439    }
440}
441
442#[derive(Debug)]
443pub struct SurfaceTexture {
444    index: u32,
445    texture: Texture,
446    surface_semaphores: Arc<Mutex<SwapchainImageSemaphores>>,
447}
448
449impl crate::DynSurfaceTexture for SurfaceTexture {}
450
451impl Borrow<Texture> for SurfaceTexture {
452    fn borrow(&self) -> &Texture {
453        &self.texture
454    }
455}
456
457impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
458    fn borrow(&self) -> &dyn crate::DynTexture {
459        &self.texture
460    }
461}
462
463pub struct Adapter {
464    raw: vk::PhysicalDevice,
465    instance: Arc<InstanceShared>,
466    //queue_families: Vec<vk::QueueFamilyProperties>,
467    known_memory_flags: vk::MemoryPropertyFlags,
468    phd_capabilities: adapter::PhysicalDeviceProperties,
469    phd_features: adapter::PhysicalDeviceFeatures,
470    downlevel_flags: wgt::DownlevelFlags,
471    private_caps: PrivateCapabilities,
472    workarounds: Workarounds,
473}
474
475// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
476enum ExtensionFn<T> {
477    /// The loaded function pointer struct for an extension.
478    Extension(T),
479    /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
480    Promoted,
481}
482
483struct DeviceExtensionFunctions {
484    debug_utils: Option<ext::debug_utils::Device>,
485    draw_indirect_count: Option<khr::draw_indirect_count::Device>,
486    timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
487    ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
488    mesh_shading: Option<ext::mesh_shader::Device>,
489}
490
491struct RayTracingDeviceExtensionFunctions {
492    acceleration_structure: khr::acceleration_structure::Device,
493    buffer_device_address: khr::buffer_device_address::Device,
494}
495
496/// Set of internal capabilities, which don't show up in the exposed
497/// device geometry, but affect the code paths taken internally.
498#[derive(Clone, Debug)]
499struct PrivateCapabilities {
500    /// Y-flipping is implemented with either `VK_AMD_negative_viewport_height` or `VK_KHR_maintenance1`/1.1+. The AMD extension for negative viewport height does not require a Y shift.
501    ///
502    /// This flag is `true` if the device has `VK_KHR_maintenance1`/1.1+ and `false` otherwise (i.e. in the case of `VK_AMD_negative_viewport_height`).
503    flip_y_requires_shift: bool,
504    imageless_framebuffers: bool,
505    image_view_usage: bool,
506    timeline_semaphores: bool,
507    texture_d24: bool,
508    texture_d24_s8: bool,
509    texture_s8: bool,
510    /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
511    can_present: bool,
512    non_coherent_map_mask: wgt::BufferAddress,
513
514    /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
515    ///
516    /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
517    /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
518    /// a given bindgroup binding outside that binding's [accessible
519    /// region][ar]. Enabling `robustBufferAccess` does ensure that
520    /// out-of-bounds reads and writes are not undefined behavior (that's good),
521    /// but still permits out-of-bounds reads to return data from anywhere
522    /// within the buffer, not just the accessible region.
523    ///
524    /// [ar]: ../struct.BufferBinding.html#accessible-region
525    /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
526    robust_buffer_access: bool,
527
528    robust_image_access: bool,
529
530    /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
531    /// [`robustBufferAccess2`] feature.
532    ///
533    /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
534    /// shader accesses to buffer contents. If this feature is not available,
535    /// this backend must have Naga inject bounds checks in the generated
536    /// SPIR-V.
537    ///
538    /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
539    /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
540    /// [ar]: ../struct.BufferBinding.html#accessible-region
541    robust_buffer_access2: bool,
542
543    robust_image_access2: bool,
544    zero_initialize_workgroup_memory: bool,
545    image_format_list: bool,
546    maximum_samplers: u32,
547}
548
549bitflags::bitflags!(
550    /// Workaround flags.
551    #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
552    pub struct Workarounds: u32 {
553        /// Only generate SPIR-V for one entry point at a time.
554        const SEPARATE_ENTRY_POINTS = 0x1;
555        /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
556        /// to a subpass resolve attachment array. This nulls out that pointer in that case.
557        const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
558        /// If the following code returns false, then nvidia will end up filling the wrong range.
559        ///
560        /// ```skip
561        /// fn nvidia_succeeds() -> bool {
562        ///   # let (copy_length, start_offset) = (0, 0);
563        ///     if copy_length >= 4096 {
564        ///         if start_offset % 16 != 0 {
565        ///             if copy_length == 4096 {
566        ///                 return true;
567        ///             }
568        ///             if copy_length % 16 == 0 {
569        ///                 return false;
570        ///             }
571        ///         }
572        ///     }
573        ///     true
574        /// }
575        /// ```
576        ///
577        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
578        /// if they cover a range of 4096 bytes or more.
579        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
580    }
581);
582
583#[derive(Clone, Debug, Eq, Hash, PartialEq)]
584struct AttachmentKey {
585    format: vk::Format,
586    layout: vk::ImageLayout,
587    ops: crate::AttachmentOps,
588}
589
590impl AttachmentKey {
591    /// Returns an attachment key for a compatible attachment.
592    fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
593        Self {
594            format,
595            layout,
596            ops: crate::AttachmentOps::all(),
597        }
598    }
599}
600
601#[derive(Clone, Eq, Hash, PartialEq)]
602struct ColorAttachmentKey {
603    base: AttachmentKey,
604    resolve: Option<AttachmentKey>,
605}
606
607#[derive(Clone, Eq, Hash, PartialEq)]
608struct DepthStencilAttachmentKey {
609    base: AttachmentKey,
610    stencil_ops: crate::AttachmentOps,
611}
612
613#[derive(Clone, Eq, Default, Hash, PartialEq)]
614struct RenderPassKey {
615    colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
616    depth_stencil: Option<DepthStencilAttachmentKey>,
617    sample_count: u32,
618    multiview: Option<NonZeroU32>,
619}
620
621#[derive(Clone, Debug, Eq, Hash, PartialEq)]
622struct FramebufferAttachment {
623    /// Can be NULL if the framebuffer is image-less
624    raw: vk::ImageView,
625    raw_image_flags: vk::ImageCreateFlags,
626    view_usage: wgt::TextureUses,
627    view_format: wgt::TextureFormat,
628    raw_view_formats: Vec<vk::Format>,
629}
630
631#[derive(Clone, Eq, Hash, PartialEq)]
632struct FramebufferKey {
633    attachments: ArrayVec<FramebufferAttachment, { MAX_TOTAL_ATTACHMENTS }>,
634    extent: wgt::Extent3d,
635    sample_count: u32,
636}
637
638struct DeviceShared {
639    raw: ash::Device,
640    family_index: u32,
641    queue_index: u32,
642    raw_queue: vk::Queue,
643    drop_guard: Option<crate::DropGuard>,
644    instance: Arc<InstanceShared>,
645    physical_device: vk::PhysicalDevice,
646    enabled_extensions: Vec<&'static CStr>,
647    extension_fns: DeviceExtensionFunctions,
648    vendor_id: u32,
649    pipeline_cache_validation_key: [u8; 16],
650    timestamp_period: f32,
651    private_caps: PrivateCapabilities,
652    workarounds: Workarounds,
653    features: wgt::Features,
654    render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
655    framebuffers: Mutex<FastHashMap<FramebufferKey, vk::Framebuffer>>,
656    sampler_cache: Mutex<sampler::SamplerCache>,
657    memory_allocations_counter: InternalCounter,
658}
659
660impl Drop for DeviceShared {
661    fn drop(&mut self) {
662        for &raw in self.render_passes.lock().values() {
663            unsafe { self.raw.destroy_render_pass(raw, None) };
664        }
665        for &raw in self.framebuffers.lock().values() {
666            unsafe { self.raw.destroy_framebuffer(raw, None) };
667        }
668        if self.drop_guard.is_none() {
669            unsafe { self.raw.destroy_device(None) };
670        }
671    }
672}
673
674pub struct Device {
675    shared: Arc<DeviceShared>,
676    mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
677    desc_allocator:
678        Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
679    valid_ash_memory_types: u32,
680    naga_options: naga::back::spv::Options<'static>,
681    #[cfg(feature = "renderdoc")]
682    render_doc: crate::auxil::renderdoc::RenderDoc,
683    counters: Arc<wgt::HalCounters>,
684}
685
686impl Drop for Device {
687    fn drop(&mut self) {
688        unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
689        unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
690    }
691}
692
693/// Semaphores for forcing queue submissions to run in order.
694///
695/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
696/// ordered, then the first submission will finish on the GPU before the second
697/// submission begins. To get this behavior on Vulkan we need to pass semaphores
698/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
699/// and to signal when their execution is done.
700///
701/// Normally this can be done with a single semaphore, waited on and then
702/// signalled for each submission. At any given time there's exactly one
703/// submission that would signal the semaphore, and exactly one waiting on it,
704/// as Vulkan requires.
705///
706/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
707/// hang if we use a single semaphore. The workaround is to alternate between
708/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
709/// the workaround until, say, Oct 2026.
710///
711/// [`wgpu_hal::Queue`]: crate::Queue
712/// [`submit`]: crate::Queue::submit
713/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
714/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
715#[derive(Clone)]
716struct RelaySemaphores {
717    /// The semaphore the next submission should wait on before beginning
718    /// execution on the GPU. This is `None` for the first submission, which
719    /// should not wait on anything at all.
720    wait: Option<vk::Semaphore>,
721
722    /// The semaphore the next submission should signal when it has finished
723    /// execution on the GPU.
724    signal: vk::Semaphore,
725}
726
727impl RelaySemaphores {
728    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
729        Ok(Self {
730            wait: None,
731            signal: device.new_binary_semaphore()?,
732        })
733    }
734
735    /// Advances the semaphores, returning the semaphores that should be used for a submission.
736    fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
737        let old = self.clone();
738
739        // Build the state for the next submission.
740        match self.wait {
741            None => {
742                // The `old` values describe the first submission to this queue.
743                // The second submission should wait on `old.signal`, and then
744                // signal a new semaphore which we'll create now.
745                self.wait = Some(old.signal);
746                self.signal = device.new_binary_semaphore()?;
747            }
748            Some(ref mut wait) => {
749                // What this submission signals, the next should wait.
750                mem::swap(wait, &mut self.signal);
751            }
752        };
753
754        Ok(old)
755    }
756
757    /// Destroys the semaphores.
758    unsafe fn destroy(&self, device: &ash::Device) {
759        unsafe {
760            if let Some(wait) = self.wait {
761                device.destroy_semaphore(wait, None);
762            }
763            device.destroy_semaphore(self.signal, None);
764        }
765    }
766}
767
768pub struct Queue {
769    raw: vk::Queue,
770    swapchain_fn: khr::swapchain::Device,
771    device: Arc<DeviceShared>,
772    family_index: u32,
773    relay_semaphores: Mutex<RelaySemaphores>,
774    signal_semaphores: Mutex<(Vec<vk::Semaphore>, Vec<u64>)>,
775}
776
777impl Queue {
778    pub fn as_raw(&self) -> vk::Queue {
779        self.raw
780    }
781}
782
783impl Drop for Queue {
784    fn drop(&mut self) {
785        unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
786    }
787}
788
789#[derive(Debug)]
790pub struct Buffer {
791    raw: vk::Buffer,
792    block: Option<Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>>,
793}
794
795impl crate::DynBuffer for Buffer {}
796
797#[derive(Debug)]
798pub struct AccelerationStructure {
799    raw: vk::AccelerationStructureKHR,
800    buffer: vk::Buffer,
801    block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
802    compacted_size_query: Option<vk::QueryPool>,
803}
804
805impl crate::DynAccelerationStructure for AccelerationStructure {}
806
807#[derive(Debug)]
808pub struct Texture {
809    raw: vk::Image,
810    drop_guard: Option<crate::DropGuard>,
811    external_memory: Option<vk::DeviceMemory>,
812    block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
813    usage: wgt::TextureUses,
814    format: wgt::TextureFormat,
815    raw_flags: vk::ImageCreateFlags,
816    copy_size: crate::CopyExtent,
817    view_formats: Vec<wgt::TextureFormat>,
818}
819
820impl crate::DynTexture for Texture {}
821
822impl Texture {
823    /// # Safety
824    ///
825    /// - The image handle must not be manually destroyed
826    pub unsafe fn raw_handle(&self) -> vk::Image {
827        self.raw
828    }
829}
830
831#[derive(Debug)]
832pub struct TextureView {
833    raw: vk::ImageView,
834    layers: NonZeroU32,
835    attachment: FramebufferAttachment,
836}
837
838impl crate::DynTextureView for TextureView {}
839
840impl TextureView {
841    /// # Safety
842    ///
843    /// - The image view handle must not be manually destroyed
844    pub unsafe fn raw_handle(&self) -> vk::ImageView {
845        self.raw
846    }
847}
848
849#[derive(Debug)]
850pub struct Sampler {
851    raw: vk::Sampler,
852    create_info: vk::SamplerCreateInfo<'static>,
853}
854
855impl crate::DynSampler for Sampler {}
856
857#[derive(Debug)]
858pub struct BindGroupLayout {
859    raw: vk::DescriptorSetLayout,
860    desc_count: gpu_descriptor::DescriptorTotalCount,
861    types: Box<[(vk::DescriptorType, u32)]>,
862    /// Map of binding index to size,
863    binding_arrays: Vec<(u32, NonZeroU32)>,
864}
865
866impl crate::DynBindGroupLayout for BindGroupLayout {}
867
868#[derive(Debug)]
869pub struct PipelineLayout {
870    raw: vk::PipelineLayout,
871    binding_arrays: naga::back::spv::BindingMap,
872}
873
874impl crate::DynPipelineLayout for PipelineLayout {}
875
876#[derive(Debug)]
877pub struct BindGroup {
878    set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
879}
880
881impl crate::DynBindGroup for BindGroup {}
882
883/// Miscellaneous allocation recycling pool for `CommandAllocator`.
884#[derive(Default)]
885struct Temp {
886    marker: Vec<u8>,
887    buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
888    image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
889}
890
891impl Temp {
892    fn clear(&mut self) {
893        self.marker.clear();
894        self.buffer_barriers.clear();
895        self.image_barriers.clear();
896    }
897
898    fn make_c_str(&mut self, name: &str) -> &CStr {
899        self.marker.clear();
900        self.marker.extend_from_slice(name.as_bytes());
901        self.marker.push(0);
902        unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
903    }
904}
905
906pub struct CommandEncoder {
907    raw: vk::CommandPool,
908    device: Arc<DeviceShared>,
909
910    /// The current command buffer, if `self` is in the ["recording"]
911    /// state.
912    ///
913    /// ["recording"]: crate::CommandEncoder
914    ///
915    /// If non-`null`, the buffer is in the Vulkan "recording" state.
916    active: vk::CommandBuffer,
917
918    /// What kind of pass we are currently within: compute or render.
919    bind_point: vk::PipelineBindPoint,
920
921    /// Allocation recycling pool for this encoder.
922    temp: Temp,
923
924    /// A pool of available command buffers.
925    ///
926    /// These are all in the Vulkan "initial" state.
927    free: Vec<vk::CommandBuffer>,
928
929    /// A pool of discarded command buffers.
930    ///
931    /// These could be in any Vulkan state except "pending".
932    discarded: Vec<vk::CommandBuffer>,
933
934    /// If this is true, the active renderpass enabled a debug span,
935    /// and needs to be disabled on renderpass close.
936    rpass_debug_marker_active: bool,
937
938    /// If set, the end of the next render/compute pass will write a timestamp at
939    /// the given pool & location.
940    end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
941
942    counters: Arc<wgt::HalCounters>,
943}
944
945impl Drop for CommandEncoder {
946    fn drop(&mut self) {
947        // SAFETY:
948        //
949        // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
950        // `CommandBuffer` must live until its execution is complete, and that a
951        // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
952        // Thus, we know that none of our `CommandBuffers` are in the "pending"
953        // state.
954        //
955        // The other VUIDs are pretty obvious.
956        unsafe {
957            // `vkDestroyCommandPool` also frees any command buffers allocated
958            // from that pool, so there's no need to explicitly call
959            // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
960            // fields.
961            self.device.raw.destroy_command_pool(self.raw, None);
962        }
963        self.counters.command_encoders.sub(1);
964    }
965}
966
967impl CommandEncoder {
968    /// # Safety
969    ///
970    /// - The command buffer handle must not be manually destroyed
971    pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
972        self.active
973    }
974}
975
976impl fmt::Debug for CommandEncoder {
977    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
978        f.debug_struct("CommandEncoder")
979            .field("raw", &self.raw)
980            .finish()
981    }
982}
983
984#[derive(Debug)]
985pub struct CommandBuffer {
986    raw: vk::CommandBuffer,
987}
988
989impl crate::DynCommandBuffer for CommandBuffer {}
990
991#[derive(Debug)]
992#[allow(clippy::large_enum_variant)]
993pub enum ShaderModule {
994    Raw(vk::ShaderModule),
995    Intermediate {
996        naga_shader: crate::NagaShader,
997        runtime_checks: wgt::ShaderRuntimeChecks,
998    },
999}
1000
1001impl crate::DynShaderModule for ShaderModule {}
1002
1003#[derive(Debug)]
1004pub struct RenderPipeline {
1005    raw: vk::Pipeline,
1006}
1007
1008impl crate::DynRenderPipeline for RenderPipeline {}
1009
1010#[derive(Debug)]
1011pub struct ComputePipeline {
1012    raw: vk::Pipeline,
1013}
1014
1015impl crate::DynComputePipeline for ComputePipeline {}
1016
1017#[derive(Debug)]
1018pub struct PipelineCache {
1019    raw: vk::PipelineCache,
1020}
1021
1022impl crate::DynPipelineCache for PipelineCache {}
1023
1024#[derive(Debug)]
1025pub struct QuerySet {
1026    raw: vk::QueryPool,
1027}
1028
1029impl crate::DynQuerySet for QuerySet {}
1030
1031/// The [`Api::Fence`] type for [`vulkan::Api`].
1032///
1033/// This is an `enum` because there are two possible implementations of
1034/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1035/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1036/// require non-1.0 features.
1037///
1038/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1039/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1040/// otherwise.
1041///
1042/// [`Api::Fence`]: crate::Api::Fence
1043/// [`vulkan::Api`]: Api
1044/// [`Device::create_fence`]: crate::Device::create_fence
1045/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1046/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1047/// [`FencePool`]: Fence::FencePool
1048#[derive(Debug)]
1049pub enum Fence {
1050    /// A Vulkan [timeline semaphore].
1051    ///
1052    /// These are simpler to use than Vulkan fences, since timeline semaphores
1053    /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1054    ///
1055    /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1056    /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1057    TimelineSemaphore(vk::Semaphore),
1058
1059    /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1060    ///
1061    /// The effective [`FenceValue`] of this variant is the greater of
1062    /// `last_completed` and the maximum value associated with a signalled fence
1063    /// in `active`.
1064    ///
1065    /// Fences are available in all versions of Vulkan, but since they only have
1066    /// two states, "signaled" and "unsignaled", we need to use a separate fence
1067    /// for each queue submission we might want to wait for, and remember which
1068    /// [`FenceValue`] each one represents.
1069    ///
1070    /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1071    /// [`FenceValue`]: crate::FenceValue
1072    FencePool {
1073        last_completed: crate::FenceValue,
1074        /// The pending fence values have to be ascending.
1075        active: Vec<(crate::FenceValue, vk::Fence)>,
1076        free: Vec<vk::Fence>,
1077    },
1078}
1079
1080impl crate::DynFence for Fence {}
1081
1082impl Fence {
1083    /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1084    ///
1085    /// As an optimization, assume that we already know that the fence has
1086    /// reached `last_completed`, and don't bother checking fences whose values
1087    /// are less than that: those fences remain in the `active` array only
1088    /// because we haven't called `maintain` yet to clean them up.
1089    ///
1090    /// [`FenceValue`]: crate::FenceValue
1091    fn check_active(
1092        device: &ash::Device,
1093        mut last_completed: crate::FenceValue,
1094        active: &[(crate::FenceValue, vk::Fence)],
1095    ) -> Result<crate::FenceValue, crate::DeviceError> {
1096        for &(value, raw) in active.iter() {
1097            unsafe {
1098                if value > last_completed
1099                    && device
1100                        .get_fence_status(raw)
1101                        .map_err(map_host_device_oom_and_lost_err)?
1102                {
1103                    last_completed = value;
1104                }
1105            }
1106        }
1107        Ok(last_completed)
1108    }
1109
1110    /// Return the highest signalled [`FenceValue`] for `self`.
1111    ///
1112    /// [`FenceValue`]: crate::FenceValue
1113    fn get_latest(
1114        &self,
1115        device: &ash::Device,
1116        extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1117    ) -> Result<crate::FenceValue, crate::DeviceError> {
1118        match *self {
1119            Self::TimelineSemaphore(raw) => unsafe {
1120                Ok(match *extension.unwrap() {
1121                    ExtensionFn::Extension(ref ext) => ext
1122                        .get_semaphore_counter_value(raw)
1123                        .map_err(map_host_device_oom_and_lost_err)?,
1124                    ExtensionFn::Promoted => device
1125                        .get_semaphore_counter_value(raw)
1126                        .map_err(map_host_device_oom_and_lost_err)?,
1127                })
1128            },
1129            Self::FencePool {
1130                last_completed,
1131                ref active,
1132                free: _,
1133            } => Self::check_active(device, last_completed, active),
1134        }
1135    }
1136
1137    /// Trim the internal state of this [`Fence`].
1138    ///
1139    /// This function has no externally visible effect, but you should call it
1140    /// periodically to keep this fence's resource consumption under control.
1141    ///
1142    /// For fences using the [`FencePool`] implementation, this function
1143    /// recycles fences that have been signaled. If you don't call this,
1144    /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1145    /// time it's called.
1146    ///
1147    /// [`FencePool`]: Fence::FencePool
1148    /// [`Queue::submit`]: crate::Queue::submit
1149    fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1150        match *self {
1151            Self::TimelineSemaphore(_) => {}
1152            Self::FencePool {
1153                ref mut last_completed,
1154                ref mut active,
1155                ref mut free,
1156            } => {
1157                let latest = Self::check_active(device, *last_completed, active)?;
1158                let base_free = free.len();
1159                for &(value, raw) in active.iter() {
1160                    if value <= latest {
1161                        free.push(raw);
1162                    }
1163                }
1164                if free.len() != base_free {
1165                    active.retain(|&(value, _)| value > latest);
1166                    unsafe { device.reset_fences(&free[base_free..]) }
1167                        .map_err(map_device_oom_err)?
1168                }
1169                *last_completed = latest;
1170            }
1171        }
1172        Ok(())
1173    }
1174}
1175
1176impl crate::Queue for Queue {
1177    type A = Api;
1178
1179    unsafe fn submit(
1180        &self,
1181        command_buffers: &[&CommandBuffer],
1182        surface_textures: &[&SurfaceTexture],
1183        (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1184    ) -> Result<(), crate::DeviceError> {
1185        let mut fence_raw = vk::Fence::null();
1186
1187        let mut wait_stage_masks = Vec::new();
1188        let mut wait_semaphores = Vec::new();
1189        let mut signal_semaphores = Vec::new();
1190        let mut signal_values = Vec::new();
1191
1192        // Double check that the same swapchain image isn't being given to us multiple times,
1193        // as that will deadlock when we try to lock them all.
1194        debug_assert!(
1195            {
1196                let mut check = HashSet::with_capacity(surface_textures.len());
1197                // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
1198                for st in surface_textures {
1199                    check.insert(Arc::as_ptr(&st.surface_semaphores));
1200                }
1201                check.len() == surface_textures.len()
1202            },
1203            "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1204        );
1205
1206        let locked_swapchain_semaphores = surface_textures
1207            .iter()
1208            .map(|st| {
1209                st.surface_semaphores
1210                    .try_lock()
1211                    .expect("Failed to lock surface semaphore.")
1212            })
1213            .collect::<Vec<_>>();
1214
1215        for mut swapchain_semaphore in locked_swapchain_semaphores {
1216            swapchain_semaphore.set_used_fence_value(signal_value);
1217
1218            // If we're the first submission to operate on this image, wait on
1219            // its acquire semaphore, to make sure the presentation engine is
1220            // done with it.
1221            if let Some(sem) = swapchain_semaphore.get_acquire_wait_semaphore() {
1222                wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1223                wait_semaphores.push(sem);
1224            }
1225
1226            // Get a semaphore to signal when we're done writing to this surface
1227            // image. Presentation of this image will wait for this.
1228            let signal_semaphore = swapchain_semaphore.get_submit_signal_semaphore(&self.device)?;
1229            signal_semaphores.push(signal_semaphore);
1230            signal_values.push(!0);
1231        }
1232
1233        let mut guards = self.signal_semaphores.lock();
1234        let (ref mut pending_signal_semaphores, ref mut pending_signal_semaphore_values) =
1235            guards.deref_mut();
1236        assert!(pending_signal_semaphores.len() == pending_signal_semaphore_values.len());
1237        if !pending_signal_semaphores.is_empty() {
1238            signal_semaphores.append(pending_signal_semaphores);
1239            signal_values.append(pending_signal_semaphore_values);
1240        }
1241
1242        // In order for submissions to be strictly ordered, we encode a dependency between each submission
1243        // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1244        let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1245
1246        if let Some(sem) = semaphore_state.wait {
1247            wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1248            wait_semaphores.push(sem);
1249        }
1250
1251        signal_semaphores.push(semaphore_state.signal);
1252        signal_values.push(!0);
1253
1254        // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1255        signal_fence.maintain(&self.device.raw)?;
1256        match *signal_fence {
1257            Fence::TimelineSemaphore(raw) => {
1258                signal_semaphores.push(raw);
1259                signal_values.push(signal_value);
1260            }
1261            Fence::FencePool {
1262                ref mut active,
1263                ref mut free,
1264                ..
1265            } => {
1266                fence_raw = match free.pop() {
1267                    Some(raw) => raw,
1268                    None => unsafe {
1269                        self.device
1270                            .raw
1271                            .create_fence(&vk::FenceCreateInfo::default(), None)
1272                            .map_err(map_host_device_oom_err)?
1273                    },
1274                };
1275                active.push((signal_value, fence_raw));
1276            }
1277        }
1278
1279        let vk_cmd_buffers = command_buffers
1280            .iter()
1281            .map(|cmd| cmd.raw)
1282            .collect::<Vec<_>>();
1283
1284        let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1285
1286        vk_info = vk_info
1287            .wait_semaphores(&wait_semaphores)
1288            .wait_dst_stage_mask(&wait_stage_masks)
1289            .signal_semaphores(&signal_semaphores);
1290
1291        let mut vk_timeline_info;
1292
1293        if self.device.private_caps.timeline_semaphores {
1294            vk_timeline_info =
1295                vk::TimelineSemaphoreSubmitInfo::default().signal_semaphore_values(&signal_values);
1296            vk_info = vk_info.push_next(&mut vk_timeline_info);
1297        }
1298
1299        profiling::scope!("vkQueueSubmit");
1300        unsafe {
1301            self.device
1302                .raw
1303                .queue_submit(self.raw, &[vk_info], fence_raw)
1304                .map_err(map_host_device_oom_and_lost_err)?
1305        };
1306        Ok(())
1307    }
1308
1309    unsafe fn present(
1310        &self,
1311        surface: &Surface,
1312        texture: SurfaceTexture,
1313    ) -> Result<(), crate::SurfaceError> {
1314        let mut swapchain = surface.swapchain.write();
1315        let ssc = swapchain.as_mut().unwrap();
1316        let mut swapchain_semaphores = texture.surface_semaphores.lock();
1317
1318        let swapchains = [ssc.raw];
1319        let image_indices = [texture.index];
1320        let vk_info = vk::PresentInfoKHR::default()
1321            .swapchains(&swapchains)
1322            .image_indices(&image_indices)
1323            .wait_semaphores(swapchain_semaphores.get_present_wait_semaphores());
1324
1325        let mut display_timing;
1326        let present_times;
1327        let vk_info = if let Some(present_time) = ssc.next_present_time.take() {
1328            debug_assert!(
1329                ssc.device
1330                    .features
1331                    .contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING),
1332                "`next_present_time` should only be set if `VULKAN_GOOGLE_DISPLAY_TIMING` is enabled"
1333            );
1334            present_times = [present_time];
1335            display_timing = vk::PresentTimesInfoGOOGLE::default().times(&present_times);
1336            // SAFETY: We know that VK_GOOGLE_display_timing is present because of the safety contract on `next_present_time`.
1337            vk_info.push_next(&mut display_timing)
1338        } else {
1339            vk_info
1340        };
1341
1342        let suboptimal = {
1343            profiling::scope!("vkQueuePresentKHR");
1344            unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1345                match error {
1346                    vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1347                    vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1348                    // We don't use VK_EXT_full_screen_exclusive
1349                    // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
1350                    _ => map_host_device_oom_and_lost_err(error).into(),
1351                }
1352            })?
1353        };
1354        if suboptimal {
1355            // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1356            // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1357            // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1358            // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1359            #[cfg(not(target_os = "android"))]
1360            log::warn!("Suboptimal present of frame {}", texture.index);
1361        }
1362        Ok(())
1363    }
1364
1365    unsafe fn get_timestamp_period(&self) -> f32 {
1366        self.device.timestamp_period
1367    }
1368}
1369
1370impl Queue {
1371    pub fn raw_device(&self) -> &ash::Device {
1372        &self.device.raw
1373    }
1374
1375    pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1376        let mut guards = self.signal_semaphores.lock();
1377        let (ref mut semaphores, ref mut semaphore_values) = guards.deref_mut();
1378        semaphores.push(semaphore);
1379        semaphore_values.push(semaphore_value.unwrap_or(!0));
1380    }
1381}
1382
1383/// Maps
1384///
1385/// - VK_ERROR_OUT_OF_HOST_MEMORY
1386/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1387fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1388    match err {
1389        vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1390            get_oom_err(err)
1391        }
1392        e => get_unexpected_err(e),
1393    }
1394}
1395
1396/// Maps
1397///
1398/// - VK_ERROR_OUT_OF_HOST_MEMORY
1399/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1400/// - VK_ERROR_DEVICE_LOST
1401fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1402    match err {
1403        vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1404        other => map_host_device_oom_err(other),
1405    }
1406}
1407
1408/// Maps
1409///
1410/// - VK_ERROR_OUT_OF_HOST_MEMORY
1411/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1412/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1413fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1414    // We don't use VK_KHR_buffer_device_address
1415    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1416    map_host_device_oom_err(err)
1417}
1418
1419/// Maps
1420///
1421/// - VK_ERROR_OUT_OF_HOST_MEMORY
1422fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1423    match err {
1424        vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1425        e => get_unexpected_err(e),
1426    }
1427}
1428
1429/// Maps
1430///
1431/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1432fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1433    match err {
1434        vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1435        e => get_unexpected_err(e),
1436    }
1437}
1438
1439/// Maps
1440///
1441/// - VK_ERROR_OUT_OF_HOST_MEMORY
1442/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1443fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1444    // We don't use VK_KHR_buffer_device_address
1445    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1446    map_host_oom_err(err)
1447}
1448
1449/// Maps
1450///
1451/// - VK_ERROR_OUT_OF_HOST_MEMORY
1452/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1453/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1454/// - VK_ERROR_INVALID_SHADER_NV
1455fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1456    // We don't use VK_EXT_pipeline_creation_cache_control
1457    // VK_PIPELINE_COMPILE_REQUIRED_EXT
1458    // We don't use VK_NV_glsl_shader
1459    // VK_ERROR_INVALID_SHADER_NV
1460    map_host_device_oom_err(err)
1461}
1462
1463/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1464/// feature flag is enabled.
1465fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1466    #[cfg(feature = "internal_error_panic")]
1467    panic!("Unexpected Vulkan error: {_err:?}");
1468
1469    #[allow(unreachable_code)]
1470    crate::DeviceError::Unexpected
1471}
1472
1473/// Returns [`crate::DeviceError::OutOfMemory`] or panics if the `oom_panic`
1474/// feature flag is enabled.
1475fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1476    #[cfg(feature = "oom_panic")]
1477    panic!("Out of memory ({_err:?})");
1478
1479    #[allow(unreachable_code)]
1480    crate::DeviceError::OutOfMemory
1481}
1482
1483/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1484/// feature flag is enabled.
1485fn get_lost_err() -> crate::DeviceError {
1486    #[cfg(feature = "device_lost_panic")]
1487    panic!("Device lost");
1488
1489    #[allow(unreachable_code)]
1490    crate::DeviceError::Lost
1491}
1492
1493#[derive(Clone, Copy, Pod, Zeroable)]
1494#[repr(C)]
1495struct RawTlasInstance {
1496    transform: [f32; 12],
1497    custom_data_and_mask: u32,
1498    shader_binding_table_record_offset_and_flags: u32,
1499    acceleration_structure_reference: u64,
1500}