wgpu_hal/vulkan/
mod.rs

1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8  - temporarily allocating `Vec` on heap, where overhead is permitted
9  - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35
36pub use adapter::PhysicalDeviceFeatures;
37
38use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
39use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
40
41use arrayvec::ArrayVec;
42use ash::{ext, khr, vk};
43use bytemuck::{Pod, Zeroable};
44use hashbrown::HashSet;
45use parking_lot::{Mutex, RwLock};
46
47use naga::FastHashMap;
48use wgt::InternalCounter;
49
50use semaphore_list::SemaphoreList;
51
52const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
53
54#[derive(Clone, Debug)]
55pub struct Api;
56
57impl crate::Api for Api {
58    const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
59
60    type Instance = Instance;
61    type Surface = Surface;
62    type Adapter = Adapter;
63    type Device = Device;
64
65    type Queue = Queue;
66    type CommandEncoder = CommandEncoder;
67    type CommandBuffer = CommandBuffer;
68
69    type Buffer = Buffer;
70    type Texture = Texture;
71    type SurfaceTexture = SurfaceTexture;
72    type TextureView = TextureView;
73    type Sampler = Sampler;
74    type QuerySet = QuerySet;
75    type Fence = Fence;
76    type AccelerationStructure = AccelerationStructure;
77    type PipelineCache = PipelineCache;
78
79    type BindGroupLayout = BindGroupLayout;
80    type BindGroup = BindGroup;
81    type PipelineLayout = PipelineLayout;
82    type ShaderModule = ShaderModule;
83    type RenderPipeline = RenderPipeline;
84    type ComputePipeline = ComputePipeline;
85}
86
87crate::impl_dyn_resource!(
88    Adapter,
89    AccelerationStructure,
90    BindGroup,
91    BindGroupLayout,
92    Buffer,
93    CommandBuffer,
94    CommandEncoder,
95    ComputePipeline,
96    Device,
97    Fence,
98    Instance,
99    PipelineCache,
100    PipelineLayout,
101    QuerySet,
102    Queue,
103    RenderPipeline,
104    Sampler,
105    ShaderModule,
106    Surface,
107    SurfaceTexture,
108    Texture,
109    TextureView
110);
111
112struct DebugUtils {
113    extension: ext::debug_utils::Instance,
114    messenger: vk::DebugUtilsMessengerEXT,
115
116    /// Owning pointer to the debug messenger callback user data.
117    ///
118    /// `InstanceShared::drop` destroys the debug messenger before
119    /// dropping this, so the callback should never receive a dangling
120    /// user data pointer.
121    #[allow(dead_code)]
122    callback_data: Box<DebugUtilsMessengerUserData>,
123}
124
125pub struct DebugUtilsCreateInfo {
126    severity: vk::DebugUtilsMessageSeverityFlagsEXT,
127    message_type: vk::DebugUtilsMessageTypeFlagsEXT,
128    callback_data: Box<DebugUtilsMessengerUserData>,
129}
130
131#[derive(Debug)]
132/// The properties related to the validation layer needed for the
133/// DebugUtilsMessenger for their workarounds
134struct ValidationLayerProperties {
135    /// Validation layer description, from `vk::LayerProperties`.
136    layer_description: CString,
137
138    /// Validation layer specification version, from `vk::LayerProperties`.
139    layer_spec_version: u32,
140}
141
142/// User data needed by `instance::debug_utils_messenger_callback`.
143///
144/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
145/// pointer refers to one of these values.
146#[derive(Debug)]
147pub struct DebugUtilsMessengerUserData {
148    /// The properties related to the validation layer, if present
149    validation_layer_properties: Option<ValidationLayerProperties>,
150
151    /// If the OBS layer is present. OBS never increments the version of their layer,
152    /// so there's no reason to have the version.
153    has_obs_layer: bool,
154}
155
156pub struct InstanceShared {
157    raw: ash::Instance,
158    extensions: Vec<&'static CStr>,
159    drop_guard: Option<crate::DropGuard>,
160    flags: wgt::InstanceFlags,
161    memory_budget_thresholds: wgt::MemoryBudgetThresholds,
162    debug_utils: Option<DebugUtils>,
163    get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
164    entry: ash::Entry,
165    has_nv_optimus: bool,
166    android_sdk_version: u32,
167    /// The instance API version.
168    ///
169    /// Which is the version of Vulkan supported for instance-level functionality.
170    ///
171    /// It is associated with a `VkInstance` and its children,
172    /// except for a `VkPhysicalDevice` and its children.
173    instance_api_version: u32,
174}
175
176pub struct Instance {
177    shared: Arc<InstanceShared>,
178}
179
180/// Semaphore used to acquire a swapchain image.
181#[derive(Debug)]
182struct SwapchainAcquireSemaphore {
183    /// A semaphore that is signaled when this image is safe for us to modify.
184    ///
185    /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
186    /// image that we should use, that image may actually still be in use by the
187    /// presentation engine, and is not yet safe to modify. However, that
188    /// function does accept a semaphore that it will signal when the image is
189    /// indeed safe to begin messing with.
190    ///
191    /// This semaphore is:
192    ///
193    /// - waited for by the first queue submission to operate on this image
194    ///   since it was acquired, and
195    ///
196    /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
197    ///   for us to use.
198    ///
199    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
200    acquire: vk::Semaphore,
201
202    /// True if the next command submission operating on this image should wait
203    /// for [`acquire`].
204    ///
205    /// We must wait for `acquire` before drawing to this swapchain image, but
206    /// because `wgpu-hal` queue submissions are always strongly ordered, only
207    /// the first submission that works with a swapchain image actually needs to
208    /// wait. We set this flag when this image is acquired, and clear it the
209    /// first time it's passed to [`Queue::submit`] as a surface texture.
210    ///
211    /// Additionally, semaphores can only be waited on once, so we need to ensure
212    /// that we only actually pass this semaphore to the first submission that
213    /// uses that image.
214    ///
215    /// [`acquire`]: SwapchainAcquireSemaphore::acquire
216    /// [`Queue::submit`]: crate::Queue::submit
217    should_wait_for_acquire: bool,
218
219    /// The fence value of the last command submission that wrote to this image.
220    ///
221    /// The next time we try to acquire this image, we'll block until
222    /// this submission finishes, proving that [`acquire`] is ready to
223    /// pass to `vkAcquireNextImageKHR` again.
224    ///
225    /// [`acquire`]: SwapchainAcquireSemaphore::acquire
226    previously_used_submission_index: crate::FenceValue,
227}
228
229impl SwapchainAcquireSemaphore {
230    fn new(device: &DeviceShared, index: usize) -> Result<Self, crate::DeviceError> {
231        Ok(Self {
232            acquire: device
233                .new_binary_semaphore(&format!("SwapchainImageSemaphore: Index {index} acquire"))?,
234            should_wait_for_acquire: true,
235            previously_used_submission_index: 0,
236        })
237    }
238
239    /// Sets the fence value which the next acquire will wait for. This prevents
240    /// the semaphore from being used while the previous submission is still in flight.
241    fn set_used_fence_value(&mut self, value: crate::FenceValue) {
242        self.previously_used_submission_index = value;
243    }
244
245    /// Return the semaphore that commands drawing to this image should wait for, if any.
246    ///
247    /// This only returns `Some` once per acquisition; see
248    /// [`SwapchainAcquireSemaphore::should_wait_for_acquire`] for details.
249    fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
250        if self.should_wait_for_acquire {
251            self.should_wait_for_acquire = false;
252            Some(self.acquire)
253        } else {
254            None
255        }
256    }
257
258    /// Indicates the cpu-side usage of this semaphore has finished for the frame,
259    /// so reset internal state to be ready for the next frame.
260    fn end_semaphore_usage(&mut self) {
261        // Reset the acquire semaphore, so that the next time we acquire this
262        // image, we can wait for it again.
263        self.should_wait_for_acquire = true;
264    }
265
266    unsafe fn destroy(&self, device: &ash::Device) {
267        unsafe {
268            device.destroy_semaphore(self.acquire, None);
269        }
270    }
271}
272
273#[derive(Debug)]
274struct SwapchainPresentSemaphores {
275    /// A pool of semaphores for ordering presentation after drawing.
276    ///
277    /// The first [`present_index`] semaphores in this vector are:
278    ///
279    /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
280    ///   image, and
281    ///
282    /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
283    ///   this image, when the submission finishes execution.
284    ///
285    /// This vector accumulates one semaphore per submission that writes to this
286    /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
287    /// requires a semaphore to order it with respect to drawing commands, and
288    /// we can't attach new completion semaphores to a command submission after
289    /// it's been submitted. This means that, at submission time, we must create
290    /// the semaphore we might need if the caller's next action is to enqueue a
291    /// presentation of this image.
292    ///
293    /// An alternative strategy would be for presentation to enqueue an empty
294    /// submit, ordered relative to other submits in the usual way, and
295    /// signaling a single presentation semaphore. But we suspect that submits
296    /// are usually expensive enough, and semaphores usually cheap enough, that
297    /// performance-sensitive users will avoid making many submits, so that the
298    /// cost of accumulated semaphores will usually be less than the cost of an
299    /// additional submit.
300    ///
301    /// Only the first [`present_index`] semaphores in the vector are actually
302    /// going to be signalled by submitted commands, and need to be waited for
303    /// by the next present call. Any semaphores beyond that index were created
304    /// for prior presents and are simply being retained for recycling.
305    ///
306    /// [`present_index`]: SwapchainPresentSemaphores::present_index
307    /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
308    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
309    present: Vec<vk::Semaphore>,
310
311    /// The number of semaphores in [`present`] to be signalled for this submission.
312    ///
313    /// [`present`]: SwapchainPresentSemaphores::present
314    present_index: usize,
315
316    /// Which image this semaphore set is used for.
317    frame_index: usize,
318}
319
320impl SwapchainPresentSemaphores {
321    pub fn new(frame_index: usize) -> Self {
322        Self {
323            present: Vec::new(),
324            present_index: 0,
325            frame_index,
326        }
327    }
328
329    /// Return the semaphore that the next submission that writes to this image should
330    /// signal when it's done.
331    ///
332    /// See [`SwapchainPresentSemaphores::present`] for details.
333    fn get_submit_signal_semaphore(
334        &mut self,
335        device: &DeviceShared,
336    ) -> Result<vk::Semaphore, crate::DeviceError> {
337        // Try to recycle a semaphore we created for a previous presentation.
338        let sem = match self.present.get(self.present_index) {
339            Some(sem) => *sem,
340            None => {
341                let sem = device.new_binary_semaphore(&format!(
342                    "SwapchainImageSemaphore: Image {} present semaphore {}",
343                    self.frame_index, self.present_index
344                ))?;
345                self.present.push(sem);
346                sem
347            }
348        };
349
350        self.present_index += 1;
351
352        Ok(sem)
353    }
354
355    /// Indicates the cpu-side usage of this semaphore has finished for the frame,
356    /// so reset internal state to be ready for the next frame.
357    fn end_semaphore_usage(&mut self) {
358        // Reset the index to 0, so that the next time we get a semaphore, we
359        // start from the beginning of the list.
360        self.present_index = 0;
361    }
362
363    /// Return the semaphores that a presentation of this image should wait on.
364    ///
365    /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
366    /// ends this image's acquisition should wait for. See
367    /// [`SwapchainPresentSemaphores::present`] for details.
368    ///
369    /// Reset `self` to be ready for the next acquisition cycle.
370    ///
371    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
372    fn get_present_wait_semaphores(&mut self) -> Vec<vk::Semaphore> {
373        self.present[0..self.present_index].to_vec()
374    }
375
376    unsafe fn destroy(&self, device: &ash::Device) {
377        unsafe {
378            for sem in &self.present {
379                device.destroy_semaphore(*sem, None);
380            }
381        }
382    }
383}
384
385struct Swapchain {
386    raw: vk::SwapchainKHR,
387    functor: khr::swapchain::Device,
388    device: Arc<DeviceShared>,
389    images: Vec<vk::Image>,
390    /// Fence used to wait on the acquired image.
391    fence: vk::Fence,
392    config: crate::SurfaceConfiguration,
393
394    /// Semaphores used between image acquisition and the first submission
395    /// that uses that image. This is indexed using [`next_acquire_index`].
396    ///
397    /// Because we need to provide this to [`vkAcquireNextImageKHR`], we haven't
398    /// received the swapchain image index for the frame yet, so we cannot use
399    /// that to index it.
400    ///
401    /// Before we pass this to [`vkAcquireNextImageKHR`], we ensure that we wait on
402    /// the submission indicated by [`previously_used_submission_index`]. This enusres
403    /// the semaphore is no longer in use before we use it.
404    ///
405    /// [`next_acquire_index`]: Swapchain::next_acquire_index
406    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
407    /// [`previously_used_submission_index`]: SwapchainAcquireSemaphore::previously_used_submission_index
408    acquire_semaphores: Vec<Arc<Mutex<SwapchainAcquireSemaphore>>>,
409    /// The index of the next acquire semaphore to use.
410    ///
411    /// This is incremented each time we acquire a new image, and wraps around
412    /// to 0 when it reaches the end of [`acquire_semaphores`].
413    ///
414    /// [`acquire_semaphores`]: Swapchain::acquire_semaphores
415    next_acquire_index: usize,
416
417    /// Semaphore sets used between all submissions that write to an image and
418    /// the presentation of that image.
419    ///
420    /// This is indexed by the swapchain image index returned by
421    /// [`vkAcquireNextImageKHR`].
422    ///
423    /// We know it is safe to use these semaphores because use them
424    /// _after_ the acquire semaphore. Because the acquire semaphore
425    /// has been signaled, the previous presentation using that image
426    /// is known-finished, so this semaphore is no longer in use.
427    ///
428    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
429    present_semaphores: Vec<Arc<Mutex<SwapchainPresentSemaphores>>>,
430
431    /// The present timing information which will be set in the next call to [`present()`](crate::Queue::present()).
432    ///
433    /// # Safety
434    ///
435    /// This must only be set if [`wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING`] is enabled, and
436    /// so the VK_GOOGLE_display_timing extension is present.
437    next_present_time: Option<vk::PresentTimeGOOGLE>,
438}
439
440impl Swapchain {
441    /// Mark the current frame finished, advancing to the next acquire semaphore.
442    fn advance_acquire_semaphore(&mut self) {
443        let semaphore_count = self.acquire_semaphores.len();
444        self.next_acquire_index = (self.next_acquire_index + 1) % semaphore_count;
445    }
446
447    /// Get the next acquire semaphore that should be used with this swapchain.
448    fn get_acquire_semaphore(&self) -> Arc<Mutex<SwapchainAcquireSemaphore>> {
449        self.acquire_semaphores[self.next_acquire_index].clone()
450    }
451
452    /// Get the set of present semaphores that should be used with the given image index.
453    fn get_present_semaphores(&self, index: u32) -> Arc<Mutex<SwapchainPresentSemaphores>> {
454        self.present_semaphores[index as usize].clone()
455    }
456}
457
458pub struct Surface {
459    raw: vk::SurfaceKHR,
460    functor: khr::surface::Instance,
461    instance: Arc<InstanceShared>,
462    swapchain: RwLock<Option<Swapchain>>,
463}
464
465impl Surface {
466    pub unsafe fn raw_handle(&self) -> vk::SurfaceKHR {
467        self.raw
468    }
469
470    /// Get the raw Vulkan swapchain associated with this surface.
471    ///
472    /// Returns [`None`] if the surface is not configured.
473    pub fn raw_swapchain(&self) -> Option<vk::SwapchainKHR> {
474        let read = self.swapchain.read();
475        read.as_ref().map(|it| it.raw)
476    }
477
478    /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
479    /// using [VK_GOOGLE_display_timing].
480    ///
481    /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
482    /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
483    ///
484    /// This can also be used to add a "not before" timestamp to the presentation.
485    ///
486    /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
487    ///
488    /// # Panics
489    ///
490    /// - If the surface hasn't been configured.
491    /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
492    ///
493    /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
494    #[track_caller]
495    pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
496        let mut swapchain = self.swapchain.write();
497        let swapchain = swapchain
498            .as_mut()
499            .expect("Surface should have been configured");
500        let features = wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING;
501        if swapchain.device.features.contains(features) {
502            swapchain.next_present_time = Some(present_timing);
503        } else {
504            // Ideally we'd use something like `device.required_features` here, but that's in `wgpu-core`, which we are a dependency of
505            panic!(
506                concat!(
507                    "Tried to set display timing properties ",
508                    "without the corresponding feature ({:?}) enabled."
509                ),
510                features
511            );
512        }
513    }
514}
515
516#[derive(Debug)]
517pub struct SurfaceTexture {
518    index: u32,
519    texture: Texture,
520    acquire_semaphores: Arc<Mutex<SwapchainAcquireSemaphore>>,
521    present_semaphores: Arc<Mutex<SwapchainPresentSemaphores>>,
522}
523
524impl crate::DynSurfaceTexture for SurfaceTexture {}
525
526impl Borrow<Texture> for SurfaceTexture {
527    fn borrow(&self) -> &Texture {
528        &self.texture
529    }
530}
531
532impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
533    fn borrow(&self) -> &dyn crate::DynTexture {
534        &self.texture
535    }
536}
537
538pub struct Adapter {
539    raw: vk::PhysicalDevice,
540    instance: Arc<InstanceShared>,
541    //queue_families: Vec<vk::QueueFamilyProperties>,
542    known_memory_flags: vk::MemoryPropertyFlags,
543    phd_capabilities: adapter::PhysicalDeviceProperties,
544    phd_features: PhysicalDeviceFeatures,
545    downlevel_flags: wgt::DownlevelFlags,
546    private_caps: PrivateCapabilities,
547    workarounds: Workarounds,
548}
549
550// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
551enum ExtensionFn<T> {
552    /// The loaded function pointer struct for an extension.
553    Extension(T),
554    /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
555    Promoted,
556}
557
558struct DeviceExtensionFunctions {
559    debug_utils: Option<ext::debug_utils::Device>,
560    draw_indirect_count: Option<khr::draw_indirect_count::Device>,
561    timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
562    ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
563    mesh_shading: Option<ext::mesh_shader::Device>,
564}
565
566struct RayTracingDeviceExtensionFunctions {
567    acceleration_structure: khr::acceleration_structure::Device,
568    buffer_device_address: khr::buffer_device_address::Device,
569}
570
571/// Set of internal capabilities, which don't show up in the exposed
572/// device geometry, but affect the code paths taken internally.
573#[derive(Clone, Debug)]
574struct PrivateCapabilities {
575    image_view_usage: bool,
576    timeline_semaphores: bool,
577    texture_d24: bool,
578    texture_d24_s8: bool,
579    texture_s8: bool,
580    /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
581    can_present: bool,
582    non_coherent_map_mask: wgt::BufferAddress,
583    multi_draw_indirect: bool,
584
585    /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
586    ///
587    /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
588    /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
589    /// a given bindgroup binding outside that binding's [accessible
590    /// region][ar]. Enabling `robustBufferAccess` does ensure that
591    /// out-of-bounds reads and writes are not undefined behavior (that's good),
592    /// but still permits out-of-bounds reads to return data from anywhere
593    /// within the buffer, not just the accessible region.
594    ///
595    /// [ar]: ../struct.BufferBinding.html#accessible-region
596    /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
597    robust_buffer_access: bool,
598
599    robust_image_access: bool,
600
601    /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
602    /// [`robustBufferAccess2`] feature.
603    ///
604    /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
605    /// shader accesses to buffer contents. If this feature is not available,
606    /// this backend must have Naga inject bounds checks in the generated
607    /// SPIR-V.
608    ///
609    /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
610    /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
611    /// [ar]: ../struct.BufferBinding.html#accessible-region
612    robust_buffer_access2: bool,
613
614    robust_image_access2: bool,
615    zero_initialize_workgroup_memory: bool,
616    image_format_list: bool,
617    maximum_samplers: u32,
618
619    /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
620    /// (promoted to Vulkan 1.3).
621    ///
622    /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
623    ///
624    /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
625    shader_integer_dot_product: bool,
626
627    /// True if this adapter supports 8-bit integers provided by the
628    /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
629    ///
630    /// Allows shaders to declare the "Int8" capability. Note, however, that this
631    /// feature alone allows the use of 8-bit integers "only in the `Private`,
632    /// `Workgroup` (for non-Block variables), and `Function` storage classes"
633    /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
634    /// `StorageBuffer`), you also need to enable the corresponding feature in
635    /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
636    /// capability (e.g., `StorageBuffer8BitAccess`).
637    ///
638    /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
639    /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
640    shader_int8: bool,
641}
642
643bitflags::bitflags!(
644    /// Workaround flags.
645    #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
646    pub struct Workarounds: u32 {
647        /// Only generate SPIR-V for one entry point at a time.
648        const SEPARATE_ENTRY_POINTS = 0x1;
649        /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
650        /// to a subpass resolve attachment array. This nulls out that pointer in that case.
651        const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
652        /// If the following code returns false, then nvidia will end up filling the wrong range.
653        ///
654        /// ```skip
655        /// fn nvidia_succeeds() -> bool {
656        ///   # let (copy_length, start_offset) = (0, 0);
657        ///     if copy_length >= 4096 {
658        ///         if start_offset % 16 != 0 {
659        ///             if copy_length == 4096 {
660        ///                 return true;
661        ///             }
662        ///             if copy_length % 16 == 0 {
663        ///                 return false;
664        ///             }
665        ///         }
666        ///     }
667        ///     true
668        /// }
669        /// ```
670        ///
671        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
672        /// if they cover a range of 4096 bytes or more.
673        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
674    }
675);
676
677#[derive(Clone, Debug, Eq, Hash, PartialEq)]
678struct AttachmentKey {
679    format: vk::Format,
680    layout: vk::ImageLayout,
681    ops: crate::AttachmentOps,
682}
683
684impl AttachmentKey {
685    /// Returns an attachment key for a compatible attachment.
686    fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
687        Self {
688            format,
689            layout,
690            ops: crate::AttachmentOps::all(),
691        }
692    }
693}
694
695#[derive(Clone, Eq, Hash, PartialEq)]
696struct ColorAttachmentKey {
697    base: AttachmentKey,
698    resolve: Option<AttachmentKey>,
699}
700
701#[derive(Clone, Eq, Hash, PartialEq)]
702struct DepthStencilAttachmentKey {
703    base: AttachmentKey,
704    stencil_ops: crate::AttachmentOps,
705}
706
707#[derive(Clone, Eq, Default, Hash, PartialEq)]
708struct RenderPassKey {
709    colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
710    depth_stencil: Option<DepthStencilAttachmentKey>,
711    sample_count: u32,
712    multiview: Option<NonZeroU32>,
713}
714
715struct DeviceShared {
716    raw: ash::Device,
717    family_index: u32,
718    queue_index: u32,
719    raw_queue: vk::Queue,
720    drop_guard: Option<crate::DropGuard>,
721    instance: Arc<InstanceShared>,
722    physical_device: vk::PhysicalDevice,
723    enabled_extensions: Vec<&'static CStr>,
724    extension_fns: DeviceExtensionFunctions,
725    vendor_id: u32,
726    pipeline_cache_validation_key: [u8; 16],
727    timestamp_period: f32,
728    private_caps: PrivateCapabilities,
729    workarounds: Workarounds,
730    features: wgt::Features,
731    render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
732    sampler_cache: Mutex<sampler::SamplerCache>,
733    memory_allocations_counter: InternalCounter,
734
735    /// Because we have cached framebuffers which are not deleted from until
736    /// the device is destroyed, if the implementation of vulkan re-uses handles
737    /// we need some way to differentiate between the old handle and the new handle.
738    /// This factory allows us to have a dedicated identity value for each texture.
739    texture_identity_factory: ResourceIdentityFactory<vk::Image>,
740    /// As above, for texture views.
741    texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
742}
743
744impl Drop for DeviceShared {
745    fn drop(&mut self) {
746        for &raw in self.render_passes.lock().values() {
747            unsafe { self.raw.destroy_render_pass(raw, None) };
748        }
749        if self.drop_guard.is_none() {
750            unsafe { self.raw.destroy_device(None) };
751        }
752    }
753}
754
755pub struct Device {
756    shared: Arc<DeviceShared>,
757    mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
758    desc_allocator:
759        Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
760    valid_ash_memory_types: u32,
761    naga_options: naga::back::spv::Options<'static>,
762    #[cfg(feature = "renderdoc")]
763    render_doc: crate::auxil::renderdoc::RenderDoc,
764    counters: Arc<wgt::HalCounters>,
765}
766
767impl Drop for Device {
768    fn drop(&mut self) {
769        unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
770        unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
771    }
772}
773
774/// Semaphores for forcing queue submissions to run in order.
775///
776/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
777/// ordered, then the first submission will finish on the GPU before the second
778/// submission begins. To get this behavior on Vulkan we need to pass semaphores
779/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
780/// and to signal when their execution is done.
781///
782/// Normally this can be done with a single semaphore, waited on and then
783/// signalled for each submission. At any given time there's exactly one
784/// submission that would signal the semaphore, and exactly one waiting on it,
785/// as Vulkan requires.
786///
787/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
788/// hang if we use a single semaphore. The workaround is to alternate between
789/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
790/// the workaround until, say, Oct 2026.
791///
792/// [`wgpu_hal::Queue`]: crate::Queue
793/// [`submit`]: crate::Queue::submit
794/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
795/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
796#[derive(Clone)]
797struct RelaySemaphores {
798    /// The semaphore the next submission should wait on before beginning
799    /// execution on the GPU. This is `None` for the first submission, which
800    /// should not wait on anything at all.
801    wait: Option<vk::Semaphore>,
802
803    /// The semaphore the next submission should signal when it has finished
804    /// execution on the GPU.
805    signal: vk::Semaphore,
806}
807
808impl RelaySemaphores {
809    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
810        Ok(Self {
811            wait: None,
812            signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
813        })
814    }
815
816    /// Advances the semaphores, returning the semaphores that should be used for a submission.
817    fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
818        let old = self.clone();
819
820        // Build the state for the next submission.
821        match self.wait {
822            None => {
823                // The `old` values describe the first submission to this queue.
824                // The second submission should wait on `old.signal`, and then
825                // signal a new semaphore which we'll create now.
826                self.wait = Some(old.signal);
827                self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
828            }
829            Some(ref mut wait) => {
830                // What this submission signals, the next should wait.
831                mem::swap(wait, &mut self.signal);
832            }
833        };
834
835        Ok(old)
836    }
837
838    /// Destroys the semaphores.
839    unsafe fn destroy(&self, device: &ash::Device) {
840        unsafe {
841            if let Some(wait) = self.wait {
842                device.destroy_semaphore(wait, None);
843            }
844            device.destroy_semaphore(self.signal, None);
845        }
846    }
847}
848
849pub struct Queue {
850    raw: vk::Queue,
851    swapchain_fn: khr::swapchain::Device,
852    device: Arc<DeviceShared>,
853    family_index: u32,
854    relay_semaphores: Mutex<RelaySemaphores>,
855    signal_semaphores: Mutex<SemaphoreList>,
856}
857
858impl Queue {
859    pub fn as_raw(&self) -> vk::Queue {
860        self.raw
861    }
862}
863
864impl Drop for Queue {
865    fn drop(&mut self) {
866        unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
867    }
868}
869#[derive(Debug)]
870enum BufferMemoryBacking {
871    Managed(gpu_alloc::MemoryBlock<vk::DeviceMemory>),
872    VulkanMemory {
873        memory: vk::DeviceMemory,
874        offset: u64,
875        size: u64,
876    },
877}
878impl BufferMemoryBacking {
879    fn memory(&self) -> &vk::DeviceMemory {
880        match self {
881            Self::Managed(m) => m.memory(),
882            Self::VulkanMemory { memory, .. } => memory,
883        }
884    }
885    fn offset(&self) -> u64 {
886        match self {
887            Self::Managed(m) => m.offset(),
888            Self::VulkanMemory { offset, .. } => *offset,
889        }
890    }
891    fn size(&self) -> u64 {
892        match self {
893            Self::Managed(m) => m.size(),
894            Self::VulkanMemory { size, .. } => *size,
895        }
896    }
897}
898#[derive(Debug)]
899pub struct Buffer {
900    raw: vk::Buffer,
901    block: Option<Mutex<BufferMemoryBacking>>,
902}
903impl Buffer {
904    /// # Safety
905    ///
906    /// - `vk_buffer`'s memory must be managed by the caller
907    /// - Externally imported buffers can't be mapped by `wgpu`
908    pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
909        Self {
910            raw: vk_buffer,
911            block: None,
912        }
913    }
914    /// # Safety
915    /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
916    /// - Externally imported buffers can't be mapped by `wgpu`
917    /// - `offset` and `size` must be valid with the allocation of `memory`
918    pub unsafe fn from_raw_managed(
919        vk_buffer: vk::Buffer,
920        memory: vk::DeviceMemory,
921        offset: u64,
922        size: u64,
923    ) -> Self {
924        Self {
925            raw: vk_buffer,
926            block: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
927                memory,
928                offset,
929                size,
930            })),
931        }
932    }
933}
934
935impl crate::DynBuffer for Buffer {}
936
937#[derive(Debug)]
938pub struct AccelerationStructure {
939    raw: vk::AccelerationStructureKHR,
940    buffer: vk::Buffer,
941    block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
942    compacted_size_query: Option<vk::QueryPool>,
943}
944
945impl crate::DynAccelerationStructure for AccelerationStructure {}
946
947#[derive(Debug)]
948pub struct Texture {
949    raw: vk::Image,
950    drop_guard: Option<crate::DropGuard>,
951    external_memory: Option<vk::DeviceMemory>,
952    block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
953    format: wgt::TextureFormat,
954    copy_size: crate::CopyExtent,
955    identity: ResourceIdentity<vk::Image>,
956}
957
958impl crate::DynTexture for Texture {}
959
960impl Texture {
961    /// # Safety
962    ///
963    /// - The image handle must not be manually destroyed
964    pub unsafe fn raw_handle(&self) -> vk::Image {
965        self.raw
966    }
967
968    /// # Safety
969    ///
970    /// - The external memory must not be manually freed
971    pub unsafe fn external_memory(&self) -> Option<vk::DeviceMemory> {
972        self.external_memory
973    }
974}
975
976#[derive(Debug)]
977pub struct TextureView {
978    raw_texture: vk::Image,
979    raw: vk::ImageView,
980    layers: NonZeroU32,
981    format: wgt::TextureFormat,
982    raw_format: vk::Format,
983    base_mip_level: u32,
984    dimension: wgt::TextureViewDimension,
985    texture_identity: ResourceIdentity<vk::Image>,
986    view_identity: ResourceIdentity<vk::ImageView>,
987}
988
989impl crate::DynTextureView for TextureView {}
990
991impl TextureView {
992    /// # Safety
993    ///
994    /// - The image view handle must not be manually destroyed
995    pub unsafe fn raw_handle(&self) -> vk::ImageView {
996        self.raw
997    }
998
999    /// Returns the raw texture view, along with its identity.
1000    fn identified_raw_view(&self) -> IdentifiedTextureView {
1001        IdentifiedTextureView {
1002            raw: self.raw,
1003            identity: self.view_identity,
1004        }
1005    }
1006}
1007
1008#[derive(Debug)]
1009pub struct Sampler {
1010    raw: vk::Sampler,
1011    create_info: vk::SamplerCreateInfo<'static>,
1012}
1013
1014impl crate::DynSampler for Sampler {}
1015
1016/// Information about a binding within a specific BindGroupLayout / BindGroup.
1017/// This will be used to construct a [`naga::back::spv::BindingInfo`], where
1018/// the descriptor set value will be taken from the index of the group.
1019#[derive(Copy, Clone, Debug)]
1020struct BindingInfo {
1021    binding: u32,
1022    binding_array_size: Option<NonZeroU32>,
1023}
1024
1025#[derive(Debug)]
1026pub struct BindGroupLayout {
1027    raw: vk::DescriptorSetLayout,
1028    desc_count: gpu_descriptor::DescriptorTotalCount,
1029    /// Sorted list of entries.
1030    entries: Box<[wgt::BindGroupLayoutEntry]>,
1031    /// Map of original binding index to remapped binding index and optional
1032    /// array size.
1033    binding_map: Vec<(u32, BindingInfo)>,
1034    contains_binding_arrays: bool,
1035}
1036
1037impl crate::DynBindGroupLayout for BindGroupLayout {}
1038
1039#[derive(Debug)]
1040pub struct PipelineLayout {
1041    raw: vk::PipelineLayout,
1042    binding_map: naga::back::spv::BindingMap,
1043}
1044
1045impl crate::DynPipelineLayout for PipelineLayout {}
1046
1047#[derive(Debug)]
1048pub struct BindGroup {
1049    set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
1050}
1051
1052impl crate::DynBindGroup for BindGroup {}
1053
1054/// Miscellaneous allocation recycling pool for `CommandAllocator`.
1055#[derive(Default)]
1056struct Temp {
1057    marker: Vec<u8>,
1058    buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
1059    image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
1060}
1061
1062impl Temp {
1063    fn clear(&mut self) {
1064        self.marker.clear();
1065        self.buffer_barriers.clear();
1066        self.image_barriers.clear();
1067    }
1068
1069    fn make_c_str(&mut self, name: &str) -> &CStr {
1070        self.marker.clear();
1071        self.marker.extend_from_slice(name.as_bytes());
1072        self.marker.push(0);
1073        unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
1074    }
1075}
1076
1077/// Generates unique IDs for each resource of type `T`.
1078///
1079/// Because vk handles are not permanently unique, this
1080/// provides a way to generate unique IDs for each resource.
1081struct ResourceIdentityFactory<T> {
1082    #[cfg(not(target_has_atomic = "64"))]
1083    next_id: Mutex<u64>,
1084    #[cfg(target_has_atomic = "64")]
1085    next_id: core::sync::atomic::AtomicU64,
1086    _phantom: PhantomData<T>,
1087}
1088
1089impl<T> ResourceIdentityFactory<T> {
1090    fn new() -> Self {
1091        Self {
1092            #[cfg(not(target_has_atomic = "64"))]
1093            next_id: Mutex::new(0),
1094            #[cfg(target_has_atomic = "64")]
1095            next_id: core::sync::atomic::AtomicU64::new(0),
1096            _phantom: PhantomData,
1097        }
1098    }
1099
1100    /// Returns a new unique ID for a resource of type `T`.
1101    fn next(&self) -> ResourceIdentity<T> {
1102        #[cfg(not(target_has_atomic = "64"))]
1103        {
1104            let mut next_id = self.next_id.lock();
1105            let id = *next_id;
1106            *next_id += 1;
1107            ResourceIdentity {
1108                id,
1109                _phantom: PhantomData,
1110            }
1111        }
1112
1113        #[cfg(target_has_atomic = "64")]
1114        ResourceIdentity {
1115            id: self
1116                .next_id
1117                .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
1118            _phantom: PhantomData,
1119        }
1120    }
1121}
1122
1123/// A unique identifier for a resource of type `T`.
1124///
1125/// This is used as a hashable key for resources, which
1126/// is permanently unique through the lifetime of the program.
1127#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
1128struct ResourceIdentity<T> {
1129    id: u64,
1130    _phantom: PhantomData<T>,
1131}
1132
1133#[derive(Clone, Eq, Hash, PartialEq)]
1134struct FramebufferKey {
1135    raw_pass: vk::RenderPass,
1136    /// Because this is used as a key in a hash map, we need to include the identity
1137    /// so that this hashes differently, even if the ImageView handles are the same
1138    /// between different views.
1139    attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
1140    /// While this is redundant for calculating the hash, we need access to an array
1141    /// of all the raw ImageViews when we are creating the actual framebuffer,
1142    /// so we store this here.
1143    attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
1144    extent: wgt::Extent3d,
1145}
1146
1147impl FramebufferKey {
1148    fn push_view(&mut self, view: IdentifiedTextureView) {
1149        self.attachment_identities.push(view.identity);
1150        self.attachment_views.push(view.raw);
1151    }
1152}
1153
1154/// A texture view paired with its identity.
1155#[derive(Copy, Clone)]
1156struct IdentifiedTextureView {
1157    raw: vk::ImageView,
1158    identity: ResourceIdentity<vk::ImageView>,
1159}
1160
1161#[derive(Clone, Eq, Hash, PartialEq)]
1162struct TempTextureViewKey {
1163    texture: vk::Image,
1164    /// As this is used in a hashmap, we need to
1165    /// include the identity so that this hashes differently,
1166    /// even if the Image handles are the same between different images.
1167    texture_identity: ResourceIdentity<vk::Image>,
1168    format: vk::Format,
1169    mip_level: u32,
1170    depth_slice: u32,
1171}
1172
1173pub struct CommandEncoder {
1174    raw: vk::CommandPool,
1175    device: Arc<DeviceShared>,
1176
1177    /// The current command buffer, if `self` is in the ["recording"]
1178    /// state.
1179    ///
1180    /// ["recording"]: crate::CommandEncoder
1181    ///
1182    /// If non-`null`, the buffer is in the Vulkan "recording" state.
1183    active: vk::CommandBuffer,
1184
1185    /// What kind of pass we are currently within: compute or render.
1186    bind_point: vk::PipelineBindPoint,
1187
1188    /// Allocation recycling pool for this encoder.
1189    temp: Temp,
1190
1191    /// A pool of available command buffers.
1192    ///
1193    /// These are all in the Vulkan "initial" state.
1194    free: Vec<vk::CommandBuffer>,
1195
1196    /// A pool of discarded command buffers.
1197    ///
1198    /// These could be in any Vulkan state except "pending".
1199    discarded: Vec<vk::CommandBuffer>,
1200
1201    /// If this is true, the active renderpass enabled a debug span,
1202    /// and needs to be disabled on renderpass close.
1203    rpass_debug_marker_active: bool,
1204
1205    /// If set, the end of the next render/compute pass will write a timestamp at
1206    /// the given pool & location.
1207    end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1208
1209    framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1210    temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
1211
1212    counters: Arc<wgt::HalCounters>,
1213}
1214
1215impl Drop for CommandEncoder {
1216    fn drop(&mut self) {
1217        // SAFETY:
1218        //
1219        // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1220        // `CommandBuffer` must live until its execution is complete, and that a
1221        // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1222        // Thus, we know that none of our `CommandBuffers` are in the "pending"
1223        // state.
1224        //
1225        // The other VUIDs are pretty obvious.
1226        unsafe {
1227            // `vkDestroyCommandPool` also frees any command buffers allocated
1228            // from that pool, so there's no need to explicitly call
1229            // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1230            // fields.
1231            self.device.raw.destroy_command_pool(self.raw, None);
1232        }
1233
1234        for (_, fb) in self.framebuffers.drain() {
1235            unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1236        }
1237
1238        for (_, view) in self.temp_texture_views.drain() {
1239            unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1240        }
1241
1242        self.counters.command_encoders.sub(1);
1243    }
1244}
1245
1246impl CommandEncoder {
1247    /// # Safety
1248    ///
1249    /// - The command buffer handle must not be manually destroyed
1250    pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1251        self.active
1252    }
1253}
1254
1255impl fmt::Debug for CommandEncoder {
1256    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1257        f.debug_struct("CommandEncoder")
1258            .field("raw", &self.raw)
1259            .finish()
1260    }
1261}
1262
1263#[derive(Debug)]
1264pub struct CommandBuffer {
1265    raw: vk::CommandBuffer,
1266}
1267
1268impl crate::DynCommandBuffer for CommandBuffer {}
1269
1270#[derive(Debug)]
1271#[allow(clippy::large_enum_variant)]
1272pub enum ShaderModule {
1273    Raw(vk::ShaderModule),
1274    Intermediate {
1275        naga_shader: crate::NagaShader,
1276        runtime_checks: wgt::ShaderRuntimeChecks,
1277    },
1278}
1279
1280impl crate::DynShaderModule for ShaderModule {}
1281
1282#[derive(Debug)]
1283pub struct RenderPipeline {
1284    raw: vk::Pipeline,
1285}
1286
1287impl crate::DynRenderPipeline for RenderPipeline {}
1288
1289#[derive(Debug)]
1290pub struct ComputePipeline {
1291    raw: vk::Pipeline,
1292}
1293
1294impl crate::DynComputePipeline for ComputePipeline {}
1295
1296#[derive(Debug)]
1297pub struct PipelineCache {
1298    raw: vk::PipelineCache,
1299}
1300
1301impl crate::DynPipelineCache for PipelineCache {}
1302
1303#[derive(Debug)]
1304pub struct QuerySet {
1305    raw: vk::QueryPool,
1306}
1307
1308impl crate::DynQuerySet for QuerySet {}
1309
1310/// The [`Api::Fence`] type for [`vulkan::Api`].
1311///
1312/// This is an `enum` because there are two possible implementations of
1313/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1314/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1315/// require non-1.0 features.
1316///
1317/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1318/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1319/// otherwise.
1320///
1321/// [`Api::Fence`]: crate::Api::Fence
1322/// [`vulkan::Api`]: Api
1323/// [`Device::create_fence`]: crate::Device::create_fence
1324/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1325/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1326/// [`FencePool`]: Fence::FencePool
1327#[derive(Debug)]
1328pub enum Fence {
1329    /// A Vulkan [timeline semaphore].
1330    ///
1331    /// These are simpler to use than Vulkan fences, since timeline semaphores
1332    /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1333    ///
1334    /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1335    /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1336    TimelineSemaphore(vk::Semaphore),
1337
1338    /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1339    ///
1340    /// The effective [`FenceValue`] of this variant is the greater of
1341    /// `last_completed` and the maximum value associated with a signalled fence
1342    /// in `active`.
1343    ///
1344    /// Fences are available in all versions of Vulkan, but since they only have
1345    /// two states, "signaled" and "unsignaled", we need to use a separate fence
1346    /// for each queue submission we might want to wait for, and remember which
1347    /// [`FenceValue`] each one represents.
1348    ///
1349    /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1350    /// [`FenceValue`]: crate::FenceValue
1351    FencePool {
1352        last_completed: crate::FenceValue,
1353        /// The pending fence values have to be ascending.
1354        active: Vec<(crate::FenceValue, vk::Fence)>,
1355        free: Vec<vk::Fence>,
1356    },
1357}
1358
1359impl crate::DynFence for Fence {}
1360
1361impl Fence {
1362    /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1363    ///
1364    /// As an optimization, assume that we already know that the fence has
1365    /// reached `last_completed`, and don't bother checking fences whose values
1366    /// are less than that: those fences remain in the `active` array only
1367    /// because we haven't called `maintain` yet to clean them up.
1368    ///
1369    /// [`FenceValue`]: crate::FenceValue
1370    fn check_active(
1371        device: &ash::Device,
1372        mut last_completed: crate::FenceValue,
1373        active: &[(crate::FenceValue, vk::Fence)],
1374    ) -> Result<crate::FenceValue, crate::DeviceError> {
1375        for &(value, raw) in active.iter() {
1376            unsafe {
1377                if value > last_completed
1378                    && device
1379                        .get_fence_status(raw)
1380                        .map_err(map_host_device_oom_and_lost_err)?
1381                {
1382                    last_completed = value;
1383                }
1384            }
1385        }
1386        Ok(last_completed)
1387    }
1388
1389    /// Return the highest signalled [`FenceValue`] for `self`.
1390    ///
1391    /// [`FenceValue`]: crate::FenceValue
1392    fn get_latest(
1393        &self,
1394        device: &ash::Device,
1395        extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1396    ) -> Result<crate::FenceValue, crate::DeviceError> {
1397        match *self {
1398            Self::TimelineSemaphore(raw) => unsafe {
1399                Ok(match *extension.unwrap() {
1400                    ExtensionFn::Extension(ref ext) => ext
1401                        .get_semaphore_counter_value(raw)
1402                        .map_err(map_host_device_oom_and_lost_err)?,
1403                    ExtensionFn::Promoted => device
1404                        .get_semaphore_counter_value(raw)
1405                        .map_err(map_host_device_oom_and_lost_err)?,
1406                })
1407            },
1408            Self::FencePool {
1409                last_completed,
1410                ref active,
1411                free: _,
1412            } => Self::check_active(device, last_completed, active),
1413        }
1414    }
1415
1416    /// Trim the internal state of this [`Fence`].
1417    ///
1418    /// This function has no externally visible effect, but you should call it
1419    /// periodically to keep this fence's resource consumption under control.
1420    ///
1421    /// For fences using the [`FencePool`] implementation, this function
1422    /// recycles fences that have been signaled. If you don't call this,
1423    /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1424    /// time it's called.
1425    ///
1426    /// [`FencePool`]: Fence::FencePool
1427    /// [`Queue::submit`]: crate::Queue::submit
1428    fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1429        match *self {
1430            Self::TimelineSemaphore(_) => {}
1431            Self::FencePool {
1432                ref mut last_completed,
1433                ref mut active,
1434                ref mut free,
1435            } => {
1436                let latest = Self::check_active(device, *last_completed, active)?;
1437                let base_free = free.len();
1438                for &(value, raw) in active.iter() {
1439                    if value <= latest {
1440                        free.push(raw);
1441                    }
1442                }
1443                if free.len() != base_free {
1444                    active.retain(|&(value, _)| value > latest);
1445                    unsafe { device.reset_fences(&free[base_free..]) }
1446                        .map_err(map_device_oom_err)?
1447                }
1448                *last_completed = latest;
1449            }
1450        }
1451        Ok(())
1452    }
1453}
1454
1455impl crate::Queue for Queue {
1456    type A = Api;
1457
1458    unsafe fn submit(
1459        &self,
1460        command_buffers: &[&CommandBuffer],
1461        surface_textures: &[&SurfaceTexture],
1462        (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1463    ) -> Result<(), crate::DeviceError> {
1464        let mut fence_raw = vk::Fence::null();
1465
1466        let mut wait_stage_masks = Vec::new();
1467        let mut wait_semaphores = Vec::new();
1468        let mut signal_semaphores = SemaphoreList::default();
1469
1470        // Double check that the same swapchain image isn't being given to us multiple times,
1471        // as that will deadlock when we try to lock them all.
1472        debug_assert!(
1473            {
1474                let mut check = HashSet::with_capacity(surface_textures.len());
1475                // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
1476                for st in surface_textures {
1477                    check.insert(Arc::as_ptr(&st.acquire_semaphores) as usize);
1478                    check.insert(Arc::as_ptr(&st.present_semaphores) as usize);
1479                }
1480                check.len() == surface_textures.len() * 2
1481            },
1482            "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1483        );
1484
1485        let locked_swapchain_semaphores = surface_textures
1486            .iter()
1487            .map(|st| {
1488                let acquire = st
1489                    .acquire_semaphores
1490                    .try_lock()
1491                    .expect("Failed to lock surface acquire semaphore");
1492                let present = st
1493                    .present_semaphores
1494                    .try_lock()
1495                    .expect("Failed to lock surface present semaphore");
1496
1497                (acquire, present)
1498            })
1499            .collect::<Vec<_>>();
1500
1501        for (mut acquire_semaphore, mut present_semaphores) in locked_swapchain_semaphores {
1502            acquire_semaphore.set_used_fence_value(signal_value);
1503
1504            // If we're the first submission to operate on this image, wait on
1505            // its acquire semaphore, to make sure the presentation engine is
1506            // done with it.
1507            if let Some(sem) = acquire_semaphore.get_acquire_wait_semaphore() {
1508                wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1509                wait_semaphores.push(sem);
1510            }
1511
1512            // Get a semaphore to signal when we're done writing to this surface
1513            // image. Presentation of this image will wait for this.
1514            let signal_semaphore = present_semaphores.get_submit_signal_semaphore(&self.device)?;
1515            signal_semaphores.push_binary(signal_semaphore);
1516        }
1517
1518        let mut guard = self.signal_semaphores.lock();
1519        if !guard.is_empty() {
1520            signal_semaphores.append(&mut guard);
1521        }
1522
1523        // In order for submissions to be strictly ordered, we encode a dependency between each submission
1524        // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1525        let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1526
1527        if let Some(sem) = semaphore_state.wait {
1528            wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1529            wait_semaphores.push(sem);
1530        }
1531
1532        signal_semaphores.push_binary(semaphore_state.signal);
1533
1534        // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1535        signal_fence.maintain(&self.device.raw)?;
1536        match *signal_fence {
1537            Fence::TimelineSemaphore(raw) => {
1538                signal_semaphores.push_timeline(raw, signal_value);
1539            }
1540            Fence::FencePool {
1541                ref mut active,
1542                ref mut free,
1543                ..
1544            } => {
1545                fence_raw = match free.pop() {
1546                    Some(raw) => raw,
1547                    None => unsafe {
1548                        self.device
1549                            .raw
1550                            .create_fence(&vk::FenceCreateInfo::default(), None)
1551                            .map_err(map_host_device_oom_err)?
1552                    },
1553                };
1554                active.push((signal_value, fence_raw));
1555            }
1556        }
1557
1558        let vk_cmd_buffers = command_buffers
1559            .iter()
1560            .map(|cmd| cmd.raw)
1561            .collect::<Vec<_>>();
1562
1563        let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1564
1565        vk_info = vk_info
1566            .wait_semaphores(&wait_semaphores)
1567            .wait_dst_stage_mask(&wait_stage_masks);
1568
1569        let mut vk_timeline_info = mem::MaybeUninit::uninit();
1570        vk_info = signal_semaphores.add_to_submit(vk_info, &mut vk_timeline_info);
1571
1572        profiling::scope!("vkQueueSubmit");
1573        unsafe {
1574            self.device
1575                .raw
1576                .queue_submit(self.raw, &[vk_info], fence_raw)
1577                .map_err(map_host_device_oom_and_lost_err)?
1578        };
1579        Ok(())
1580    }
1581
1582    unsafe fn present(
1583        &self,
1584        surface: &Surface,
1585        texture: SurfaceTexture,
1586    ) -> Result<(), crate::SurfaceError> {
1587        let mut swapchain = surface.swapchain.write();
1588        let ssc = swapchain.as_mut().unwrap();
1589        let mut acquire_semaphore = texture.acquire_semaphores.lock();
1590        let mut present_semaphores = texture.present_semaphores.lock();
1591
1592        let wait_semaphores = present_semaphores.get_present_wait_semaphores();
1593
1594        // Reset the acquire and present semaphores internal state
1595        // to be ready for the next frame.
1596        //
1597        // We do this before the actual call to present to ensure that
1598        // even if this method errors and early outs, we have reset
1599        // the state for next frame.
1600        acquire_semaphore.end_semaphore_usage();
1601        present_semaphores.end_semaphore_usage();
1602
1603        drop(acquire_semaphore);
1604
1605        let swapchains = [ssc.raw];
1606        let image_indices = [texture.index];
1607        let vk_info = vk::PresentInfoKHR::default()
1608            .swapchains(&swapchains)
1609            .image_indices(&image_indices)
1610            .wait_semaphores(&wait_semaphores);
1611
1612        let mut display_timing;
1613        let present_times;
1614        let vk_info = if let Some(present_time) = ssc.next_present_time.take() {
1615            debug_assert!(
1616                ssc.device
1617                    .features
1618                    .contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING),
1619                "`next_present_time` should only be set if `VULKAN_GOOGLE_DISPLAY_TIMING` is enabled"
1620            );
1621            present_times = [present_time];
1622            display_timing = vk::PresentTimesInfoGOOGLE::default().times(&present_times);
1623            // SAFETY: We know that VK_GOOGLE_display_timing is present because of the safety contract on `next_present_time`.
1624            vk_info.push_next(&mut display_timing)
1625        } else {
1626            vk_info
1627        };
1628
1629        let suboptimal = {
1630            profiling::scope!("vkQueuePresentKHR");
1631            unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1632                match error {
1633                    vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1634                    vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1635                    // We don't use VK_EXT_full_screen_exclusive
1636                    // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
1637                    _ => map_host_device_oom_and_lost_err(error).into(),
1638                }
1639            })?
1640        };
1641        if suboptimal {
1642            // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1643            // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1644            // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1645            // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1646            #[cfg(not(target_os = "android"))]
1647            log::warn!("Suboptimal present of frame {}", texture.index);
1648        }
1649        Ok(())
1650    }
1651
1652    unsafe fn get_timestamp_period(&self) -> f32 {
1653        self.device.timestamp_period
1654    }
1655}
1656
1657impl Queue {
1658    pub fn raw_device(&self) -> &ash::Device {
1659        &self.device.raw
1660    }
1661
1662    pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1663        let mut guard = self.signal_semaphores.lock();
1664        if let Some(value) = semaphore_value {
1665            guard.push_timeline(semaphore, value);
1666        } else {
1667            guard.push_binary(semaphore);
1668        }
1669    }
1670}
1671
1672/// Maps
1673///
1674/// - VK_ERROR_OUT_OF_HOST_MEMORY
1675/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1676fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1677    match err {
1678        vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1679            get_oom_err(err)
1680        }
1681        e => get_unexpected_err(e),
1682    }
1683}
1684
1685/// Maps
1686///
1687/// - VK_ERROR_OUT_OF_HOST_MEMORY
1688/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1689/// - VK_ERROR_DEVICE_LOST
1690fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1691    match err {
1692        vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1693        other => map_host_device_oom_err(other),
1694    }
1695}
1696
1697/// Maps
1698///
1699/// - VK_ERROR_OUT_OF_HOST_MEMORY
1700/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1701/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1702fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1703    // We don't use VK_KHR_buffer_device_address
1704    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1705    map_host_device_oom_err(err)
1706}
1707
1708/// Maps
1709///
1710/// - VK_ERROR_OUT_OF_HOST_MEMORY
1711fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1712    match err {
1713        vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1714        e => get_unexpected_err(e),
1715    }
1716}
1717
1718/// Maps
1719///
1720/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1721fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1722    match err {
1723        vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1724        e => get_unexpected_err(e),
1725    }
1726}
1727
1728/// Maps
1729///
1730/// - VK_ERROR_OUT_OF_HOST_MEMORY
1731/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1732fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1733    // We don't use VK_KHR_buffer_device_address
1734    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1735    map_host_oom_err(err)
1736}
1737
1738/// Maps
1739///
1740/// - VK_ERROR_OUT_OF_HOST_MEMORY
1741/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1742/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1743/// - VK_ERROR_INVALID_SHADER_NV
1744fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1745    // We don't use VK_EXT_pipeline_creation_cache_control
1746    // VK_PIPELINE_COMPILE_REQUIRED_EXT
1747    // We don't use VK_NV_glsl_shader
1748    // VK_ERROR_INVALID_SHADER_NV
1749    map_host_device_oom_err(err)
1750}
1751
1752/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1753/// feature flag is enabled.
1754fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1755    #[cfg(feature = "internal_error_panic")]
1756    panic!("Unexpected Vulkan error: {_err:?}");
1757
1758    #[allow(unreachable_code)]
1759    crate::DeviceError::Unexpected
1760}
1761
1762/// Returns [`crate::DeviceError::OutOfMemory`].
1763fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1764    crate::DeviceError::OutOfMemory
1765}
1766
1767/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1768/// feature flag is enabled.
1769fn get_lost_err() -> crate::DeviceError {
1770    #[cfg(feature = "device_lost_panic")]
1771    panic!("Device lost");
1772
1773    #[allow(unreachable_code)]
1774    crate::DeviceError::Lost
1775}
1776
1777#[derive(Clone, Copy, Pod, Zeroable)]
1778#[repr(C)]
1779struct RawTlasInstance {
1780    transform: [f32; 12],
1781    custom_data_and_mask: u32,
1782    shader_binding_table_record_offset_and_flags: u32,
1783    acceleration_structure_reference: u64,
1784}
1785
1786/// Arguments to the [`CreateDeviceCallback`].
1787pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1788where
1789    'this: 'pnext,
1790{
1791    /// The extensions to enable for the device. You must not remove anything from this list,
1792    /// but you may add to it.
1793    pub extensions: &'arg mut Vec<&'static CStr>,
1794    /// The physical device features to enable. You may enable features, but must not disable any.
1795    pub device_features: &'arg mut PhysicalDeviceFeatures,
1796    /// The queue create infos for the device. You may add or modify queue create infos as needed.
1797    pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1798    /// The create info for the device. You may add or modify things in the pnext chain, but
1799    /// do not turn features off. Additionally, do not add things to the list of extensions,
1800    /// or to the feature set, as all changes to that member will be overwritten.
1801    pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1802    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1803    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1804    /// don't actually directly use `'this`
1805    _phantom: PhantomData<&'this ()>,
1806}
1807
1808/// Callback to allow changing the vulkan device creation parameters.
1809///
1810/// # Safety:
1811/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1812///   as the create info value will be overwritten.
1813/// - Callback must not remove features.
1814/// - Callback must not change anything to what the instance does not support.
1815pub type CreateDeviceCallback<'this> =
1816    dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1817
1818/// Arguments to the [`CreateInstanceCallback`].
1819pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1820where
1821    'this: 'pnext,
1822{
1823    /// The extensions to enable for the instance. You must not remove anything from this list,
1824    /// but you may add to it.
1825    pub extensions: &'arg mut Vec<&'static CStr>,
1826    /// The create info for the instance. You may add or modify things in the pnext chain, but
1827    /// do not turn features off. Additionally, do not add things to the list of extensions,
1828    /// all changes to that member will be overwritten.
1829    pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1830    /// Vulkan entry point.
1831    pub entry: &'arg ash::Entry,
1832    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1833    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1834    /// don't actually directly use `'this`
1835    _phantom: PhantomData<&'this ()>,
1836}
1837
1838/// Callback to allow changing the vulkan instance creation parameters.
1839///
1840/// # Safety:
1841/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1842///   as the create info value will be overwritten.
1843/// - Callback must not remove features.
1844/// - Callback must not change anything to what the instance does not support.
1845pub type CreateInstanceCallback<'this> =
1846    dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;