wgpu_hal/vulkan/mod.rs
1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8 - temporarily allocating `Vec` on heap, where overhead is permitted
9 - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27#![allow(clippy::std_instead_of_alloc, clippy::std_instead_of_core)]
28
29mod adapter;
30mod command;
31mod conv;
32mod device;
33mod drm;
34mod instance;
35mod sampler;
36
37use std::{
38 borrow::Borrow,
39 boxed::Box,
40 ffi::{CStr, CString},
41 fmt, mem,
42 num::NonZeroU32,
43 ops::DerefMut,
44 sync::Arc,
45 vec::Vec,
46};
47
48use arrayvec::ArrayVec;
49use ash::{ext, khr, vk};
50use bytemuck::{Pod, Zeroable};
51use hashbrown::HashSet;
52use parking_lot::{Mutex, RwLock};
53
54use naga::FastHashMap;
55use wgt::InternalCounter;
56
57const MILLIS_TO_NANOS: u64 = 1_000_000;
58const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
59
60#[derive(Clone, Debug)]
61pub struct Api;
62
63impl crate::Api for Api {
64 type Instance = Instance;
65 type Surface = Surface;
66 type Adapter = Adapter;
67 type Device = Device;
68
69 type Queue = Queue;
70 type CommandEncoder = CommandEncoder;
71 type CommandBuffer = CommandBuffer;
72
73 type Buffer = Buffer;
74 type Texture = Texture;
75 type SurfaceTexture = SurfaceTexture;
76 type TextureView = TextureView;
77 type Sampler = Sampler;
78 type QuerySet = QuerySet;
79 type Fence = Fence;
80 type AccelerationStructure = AccelerationStructure;
81 type PipelineCache = PipelineCache;
82
83 type BindGroupLayout = BindGroupLayout;
84 type BindGroup = BindGroup;
85 type PipelineLayout = PipelineLayout;
86 type ShaderModule = ShaderModule;
87 type RenderPipeline = RenderPipeline;
88 type ComputePipeline = ComputePipeline;
89}
90
91crate::impl_dyn_resource!(
92 Adapter,
93 AccelerationStructure,
94 BindGroup,
95 BindGroupLayout,
96 Buffer,
97 CommandBuffer,
98 CommandEncoder,
99 ComputePipeline,
100 Device,
101 Fence,
102 Instance,
103 PipelineCache,
104 PipelineLayout,
105 QuerySet,
106 Queue,
107 RenderPipeline,
108 Sampler,
109 ShaderModule,
110 Surface,
111 SurfaceTexture,
112 Texture,
113 TextureView
114);
115
116struct DebugUtils {
117 extension: ext::debug_utils::Instance,
118 messenger: vk::DebugUtilsMessengerEXT,
119
120 /// Owning pointer to the debug messenger callback user data.
121 ///
122 /// `InstanceShared::drop` destroys the debug messenger before
123 /// dropping this, so the callback should never receive a dangling
124 /// user data pointer.
125 #[allow(dead_code)]
126 callback_data: Box<DebugUtilsMessengerUserData>,
127}
128
129pub struct DebugUtilsCreateInfo {
130 severity: vk::DebugUtilsMessageSeverityFlagsEXT,
131 message_type: vk::DebugUtilsMessageTypeFlagsEXT,
132 callback_data: Box<DebugUtilsMessengerUserData>,
133}
134
135#[derive(Debug)]
136/// The properties related to the validation layer needed for the
137/// DebugUtilsMessenger for their workarounds
138struct ValidationLayerProperties {
139 /// Validation layer description, from `vk::LayerProperties`.
140 layer_description: CString,
141
142 /// Validation layer specification version, from `vk::LayerProperties`.
143 layer_spec_version: u32,
144}
145
146/// User data needed by `instance::debug_utils_messenger_callback`.
147///
148/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
149/// pointer refers to one of these values.
150#[derive(Debug)]
151pub struct DebugUtilsMessengerUserData {
152 /// The properties related to the validation layer, if present
153 validation_layer_properties: Option<ValidationLayerProperties>,
154
155 /// If the OBS layer is present. OBS never increments the version of their layer,
156 /// so there's no reason to have the version.
157 has_obs_layer: bool,
158}
159
160pub struct InstanceShared {
161 raw: ash::Instance,
162 extensions: Vec<&'static CStr>,
163 drop_guard: Option<crate::DropGuard>,
164 flags: wgt::InstanceFlags,
165 debug_utils: Option<DebugUtils>,
166 get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
167 entry: ash::Entry,
168 has_nv_optimus: bool,
169 android_sdk_version: u32,
170 /// The instance API version.
171 ///
172 /// Which is the version of Vulkan supported for instance-level functionality.
173 ///
174 /// It is associated with a `VkInstance` and its children,
175 /// except for a `VkPhysicalDevice` and its children.
176 instance_api_version: u32,
177}
178
179pub struct Instance {
180 shared: Arc<InstanceShared>,
181}
182
183/// The semaphores needed to use one image in a swapchain.
184#[derive(Debug)]
185struct SwapchainImageSemaphores {
186 /// A semaphore that is signaled when this image is safe for us to modify.
187 ///
188 /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
189 /// image that we should use, that image may actually still be in use by the
190 /// presentation engine, and is not yet safe to modify. However, that
191 /// function does accept a semaphore that it will signal when the image is
192 /// indeed safe to begin messing with.
193 ///
194 /// This semaphore is:
195 ///
196 /// - waited for by the first queue submission to operate on this image
197 /// since it was acquired, and
198 ///
199 /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
200 /// for us to use.
201 ///
202 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
203 acquire: vk::Semaphore,
204
205 /// True if the next command submission operating on this image should wait
206 /// for [`acquire`].
207 ///
208 /// We must wait for `acquire` before drawing to this swapchain image, but
209 /// because `wgpu-hal` queue submissions are always strongly ordered, only
210 /// the first submission that works with a swapchain image actually needs to
211 /// wait. We set this flag when this image is acquired, and clear it the
212 /// first time it's passed to [`Queue::submit`] as a surface texture.
213 ///
214 /// [`acquire`]: SwapchainImageSemaphores::acquire
215 /// [`Queue::submit`]: crate::Queue::submit
216 should_wait_for_acquire: bool,
217
218 /// A pool of semaphores for ordering presentation after drawing.
219 ///
220 /// The first [`present_index`] semaphores in this vector are:
221 ///
222 /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
223 /// image, and
224 ///
225 /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
226 /// this image, when the submission finishes execution.
227 ///
228 /// This vector accumulates one semaphore per submission that writes to this
229 /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
230 /// requires a semaphore to order it with respect to drawing commands, and
231 /// we can't attach new completion semaphores to a command submission after
232 /// it's been submitted. This means that, at submission time, we must create
233 /// the semaphore we might need if the caller's next action is to enqueue a
234 /// presentation of this image.
235 ///
236 /// An alternative strategy would be for presentation to enqueue an empty
237 /// submit, ordered relative to other submits in the usual way, and
238 /// signaling a single presentation semaphore. But we suspect that submits
239 /// are usually expensive enough, and semaphores usually cheap enough, that
240 /// performance-sensitive users will avoid making many submits, so that the
241 /// cost of accumulated semaphores will usually be less than the cost of an
242 /// additional submit.
243 ///
244 /// Only the first [`present_index`] semaphores in the vector are actually
245 /// going to be signalled by submitted commands, and need to be waited for
246 /// by the next present call. Any semaphores beyond that index were created
247 /// for prior presents and are simply being retained for recycling.
248 ///
249 /// [`present_index`]: SwapchainImageSemaphores::present_index
250 /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
251 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
252 present: Vec<vk::Semaphore>,
253
254 /// The number of semaphores in [`present`] to be signalled for this submission.
255 ///
256 /// [`present`]: SwapchainImageSemaphores::present
257 present_index: usize,
258
259 /// The fence value of the last command submission that wrote to this image.
260 ///
261 /// The next time we try to acquire this image, we'll block until
262 /// this submission finishes, proving that [`acquire`] is ready to
263 /// pass to `vkAcquireNextImageKHR` again.
264 ///
265 /// [`acquire`]: SwapchainImageSemaphores::acquire
266 previously_used_submission_index: crate::FenceValue,
267}
268
269impl SwapchainImageSemaphores {
270 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
271 Ok(Self {
272 acquire: device.new_binary_semaphore()?,
273 should_wait_for_acquire: true,
274 present: Vec::new(),
275 present_index: 0,
276 previously_used_submission_index: 0,
277 })
278 }
279
280 fn set_used_fence_value(&mut self, value: crate::FenceValue) {
281 self.previously_used_submission_index = value;
282 }
283
284 /// Return the semaphore that commands drawing to this image should wait for, if any.
285 ///
286 /// This only returns `Some` once per acquisition; see
287 /// [`SwapchainImageSemaphores::should_wait_for_acquire`] for details.
288 fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
289 if self.should_wait_for_acquire {
290 self.should_wait_for_acquire = false;
291 Some(self.acquire)
292 } else {
293 None
294 }
295 }
296
297 /// Return a semaphore that a submission that writes to this image should
298 /// signal when it's done.
299 ///
300 /// See [`SwapchainImageSemaphores::present`] for details.
301 fn get_submit_signal_semaphore(
302 &mut self,
303 device: &DeviceShared,
304 ) -> Result<vk::Semaphore, crate::DeviceError> {
305 // Try to recycle a semaphore we created for a previous presentation.
306 let sem = match self.present.get(self.present_index) {
307 Some(sem) => *sem,
308 None => {
309 let sem = device.new_binary_semaphore()?;
310 self.present.push(sem);
311 sem
312 }
313 };
314
315 self.present_index += 1;
316
317 Ok(sem)
318 }
319
320 /// Return the semaphores that a presentation of this image should wait on.
321 ///
322 /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
323 /// ends this image's acquisition should wait for. See
324 /// [`SwapchainImageSemaphores::present`] for details.
325 ///
326 /// Reset `self` to be ready for the next acquisition cycle.
327 ///
328 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
329 fn get_present_wait_semaphores(&mut self) -> &[vk::Semaphore] {
330 let old_index = self.present_index;
331
332 // Since this marks the end of this acquire/draw/present cycle, take the
333 // opportunity to reset `self` in preparation for the next acquisition.
334 self.present_index = 0;
335 self.should_wait_for_acquire = true;
336
337 &self.present[0..old_index]
338 }
339
340 unsafe fn destroy(&self, device: &ash::Device) {
341 unsafe {
342 device.destroy_semaphore(self.acquire, None);
343 for sem in &self.present {
344 device.destroy_semaphore(*sem, None);
345 }
346 }
347 }
348}
349
350struct Swapchain {
351 raw: vk::SwapchainKHR,
352 raw_flags: vk::SwapchainCreateFlagsKHR,
353 functor: khr::swapchain::Device,
354 device: Arc<DeviceShared>,
355 images: Vec<vk::Image>,
356 config: crate::SurfaceConfiguration,
357 view_formats: Vec<wgt::TextureFormat>,
358 /// One wait semaphore per swapchain image. This will be associated with the
359 /// surface texture, and later collected during submission.
360 ///
361 /// We need this to be `Arc<Mutex<>>` because we need to be able to pass this
362 /// data into the surface texture, so submit/present can use it.
363 surface_semaphores: Vec<Arc<Mutex<SwapchainImageSemaphores>>>,
364 /// The index of the next semaphore to use. Ideally we would use the same
365 /// index as the image index, but we need to specify the semaphore as an argument
366 /// to the acquire_next_image function which is what tells us which image to use.
367 next_semaphore_index: usize,
368 /// The present timing information which will be set in the next call to [`present()`](crate::Queue::present()).
369 ///
370 /// # Safety
371 ///
372 /// This must only be set if [`wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING`] is enabled, and
373 /// so the VK_GOOGLE_display_timing extension is present.
374 next_present_time: Option<vk::PresentTimeGOOGLE>,
375}
376
377impl Swapchain {
378 fn advance_surface_semaphores(&mut self) {
379 let semaphore_count = self.surface_semaphores.len();
380 self.next_semaphore_index = (self.next_semaphore_index + 1) % semaphore_count;
381 }
382
383 fn get_surface_semaphores(&self) -> Arc<Mutex<SwapchainImageSemaphores>> {
384 self.surface_semaphores[self.next_semaphore_index].clone()
385 }
386}
387
388pub struct Surface {
389 raw: vk::SurfaceKHR,
390 functor: khr::surface::Instance,
391 instance: Arc<InstanceShared>,
392 swapchain: RwLock<Option<Swapchain>>,
393}
394
395impl Surface {
396 /// Get the raw Vulkan swapchain associated with this surface.
397 ///
398 /// Returns [`None`] if the surface is not configured.
399 pub fn raw_swapchain(&self) -> Option<vk::SwapchainKHR> {
400 let read = self.swapchain.read();
401 read.as_ref().map(|it| it.raw)
402 }
403
404 /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
405 /// using [VK_GOOGLE_display_timing].
406 ///
407 /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
408 /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
409 ///
410 /// This can also be used to add a "not before" timestamp to the presentation.
411 ///
412 /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
413 ///
414 /// # Panics
415 ///
416 /// - If the surface hasn't been configured.
417 /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
418 ///
419 /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
420 #[track_caller]
421 pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
422 let mut swapchain = self.swapchain.write();
423 let swapchain = swapchain
424 .as_mut()
425 .expect("Surface should have been configured");
426 let features = wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING;
427 if swapchain.device.features.contains(features) {
428 swapchain.next_present_time = Some(present_timing);
429 } else {
430 // Ideally we'd use something like `device.required_features` here, but that's in `wgpu-core`, which we are a dependency of
431 panic!(
432 concat!(
433 "Tried to set display timing properties ",
434 "without the corresponding feature ({:?}) enabled."
435 ),
436 features
437 );
438 }
439 }
440}
441
442#[derive(Debug)]
443pub struct SurfaceTexture {
444 index: u32,
445 texture: Texture,
446 surface_semaphores: Arc<Mutex<SwapchainImageSemaphores>>,
447}
448
449impl crate::DynSurfaceTexture for SurfaceTexture {}
450
451impl Borrow<Texture> for SurfaceTexture {
452 fn borrow(&self) -> &Texture {
453 &self.texture
454 }
455}
456
457impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
458 fn borrow(&self) -> &dyn crate::DynTexture {
459 &self.texture
460 }
461}
462
463pub struct Adapter {
464 raw: vk::PhysicalDevice,
465 instance: Arc<InstanceShared>,
466 //queue_families: Vec<vk::QueueFamilyProperties>,
467 known_memory_flags: vk::MemoryPropertyFlags,
468 phd_capabilities: adapter::PhysicalDeviceProperties,
469 phd_features: adapter::PhysicalDeviceFeatures,
470 downlevel_flags: wgt::DownlevelFlags,
471 private_caps: PrivateCapabilities,
472 workarounds: Workarounds,
473}
474
475// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
476enum ExtensionFn<T> {
477 /// The loaded function pointer struct for an extension.
478 Extension(T),
479 /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
480 Promoted,
481}
482
483struct DeviceExtensionFunctions {
484 debug_utils: Option<ext::debug_utils::Device>,
485 draw_indirect_count: Option<khr::draw_indirect_count::Device>,
486 timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
487 ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
488 mesh_shading: Option<ext::mesh_shader::Device>,
489}
490
491struct RayTracingDeviceExtensionFunctions {
492 acceleration_structure: khr::acceleration_structure::Device,
493 buffer_device_address: khr::buffer_device_address::Device,
494}
495
496/// Set of internal capabilities, which don't show up in the exposed
497/// device geometry, but affect the code paths taken internally.
498#[derive(Clone, Debug)]
499struct PrivateCapabilities {
500 /// Y-flipping is implemented with either `VK_AMD_negative_viewport_height` or `VK_KHR_maintenance1`/1.1+. The AMD extension for negative viewport height does not require a Y shift.
501 ///
502 /// This flag is `true` if the device has `VK_KHR_maintenance1`/1.1+ and `false` otherwise (i.e. in the case of `VK_AMD_negative_viewport_height`).
503 flip_y_requires_shift: bool,
504 imageless_framebuffers: bool,
505 image_view_usage: bool,
506 timeline_semaphores: bool,
507 texture_d24: bool,
508 texture_d24_s8: bool,
509 texture_s8: bool,
510 /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
511 can_present: bool,
512 non_coherent_map_mask: wgt::BufferAddress,
513
514 /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
515 ///
516 /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
517 /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
518 /// a given bindgroup binding outside that binding's [accessible
519 /// region][ar]. Enabling `robustBufferAccess` does ensure that
520 /// out-of-bounds reads and writes are not undefined behavior (that's good),
521 /// but still permits out-of-bounds reads to return data from anywhere
522 /// within the buffer, not just the accessible region.
523 ///
524 /// [ar]: ../struct.BufferBinding.html#accessible-region
525 /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
526 robust_buffer_access: bool,
527
528 robust_image_access: bool,
529
530 /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
531 /// [`robustBufferAccess2`] feature.
532 ///
533 /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
534 /// shader accesses to buffer contents. If this feature is not available,
535 /// this backend must have Naga inject bounds checks in the generated
536 /// SPIR-V.
537 ///
538 /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
539 /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
540 /// [ar]: ../struct.BufferBinding.html#accessible-region
541 robust_buffer_access2: bool,
542
543 robust_image_access2: bool,
544 zero_initialize_workgroup_memory: bool,
545 image_format_list: bool,
546 maximum_samplers: u32,
547}
548
549bitflags::bitflags!(
550 /// Workaround flags.
551 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
552 pub struct Workarounds: u32 {
553 /// Only generate SPIR-V for one entry point at a time.
554 const SEPARATE_ENTRY_POINTS = 0x1;
555 /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
556 /// to a subpass resolve attachment array. This nulls out that pointer in that case.
557 const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
558 /// If the following code returns false, then nvidia will end up filling the wrong range.
559 ///
560 /// ```skip
561 /// fn nvidia_succeeds() -> bool {
562 /// # let (copy_length, start_offset) = (0, 0);
563 /// if copy_length >= 4096 {
564 /// if start_offset % 16 != 0 {
565 /// if copy_length == 4096 {
566 /// return true;
567 /// }
568 /// if copy_length % 16 == 0 {
569 /// return false;
570 /// }
571 /// }
572 /// }
573 /// true
574 /// }
575 /// ```
576 ///
577 /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
578 /// if they cover a range of 4096 bytes or more.
579 const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
580 }
581);
582
583#[derive(Clone, Debug, Eq, Hash, PartialEq)]
584struct AttachmentKey {
585 format: vk::Format,
586 layout: vk::ImageLayout,
587 ops: crate::AttachmentOps,
588}
589
590impl AttachmentKey {
591 /// Returns an attachment key for a compatible attachment.
592 fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
593 Self {
594 format,
595 layout,
596 ops: crate::AttachmentOps::all(),
597 }
598 }
599}
600
601#[derive(Clone, Eq, Hash, PartialEq)]
602struct ColorAttachmentKey {
603 base: AttachmentKey,
604 resolve: Option<AttachmentKey>,
605}
606
607#[derive(Clone, Eq, Hash, PartialEq)]
608struct DepthStencilAttachmentKey {
609 base: AttachmentKey,
610 stencil_ops: crate::AttachmentOps,
611}
612
613#[derive(Clone, Eq, Default, Hash, PartialEq)]
614struct RenderPassKey {
615 colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
616 depth_stencil: Option<DepthStencilAttachmentKey>,
617 sample_count: u32,
618 multiview: Option<NonZeroU32>,
619}
620
621#[derive(Clone, Debug, Eq, Hash, PartialEq)]
622struct FramebufferAttachment {
623 /// Can be NULL if the framebuffer is image-less
624 raw: vk::ImageView,
625 raw_image_flags: vk::ImageCreateFlags,
626 view_usage: wgt::TextureUses,
627 view_format: wgt::TextureFormat,
628 raw_view_formats: Vec<vk::Format>,
629}
630
631#[derive(Clone, Eq, Hash, PartialEq)]
632struct FramebufferKey {
633 attachments: ArrayVec<FramebufferAttachment, { MAX_TOTAL_ATTACHMENTS }>,
634 extent: wgt::Extent3d,
635 sample_count: u32,
636}
637
638struct DeviceShared {
639 raw: ash::Device,
640 family_index: u32,
641 queue_index: u32,
642 raw_queue: vk::Queue,
643 drop_guard: Option<crate::DropGuard>,
644 instance: Arc<InstanceShared>,
645 physical_device: vk::PhysicalDevice,
646 enabled_extensions: Vec<&'static CStr>,
647 extension_fns: DeviceExtensionFunctions,
648 vendor_id: u32,
649 pipeline_cache_validation_key: [u8; 16],
650 timestamp_period: f32,
651 private_caps: PrivateCapabilities,
652 workarounds: Workarounds,
653 features: wgt::Features,
654 render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
655 framebuffers: Mutex<FastHashMap<FramebufferKey, vk::Framebuffer>>,
656 sampler_cache: Mutex<sampler::SamplerCache>,
657 memory_allocations_counter: InternalCounter,
658}
659
660impl Drop for DeviceShared {
661 fn drop(&mut self) {
662 for &raw in self.render_passes.lock().values() {
663 unsafe { self.raw.destroy_render_pass(raw, None) };
664 }
665 for &raw in self.framebuffers.lock().values() {
666 unsafe { self.raw.destroy_framebuffer(raw, None) };
667 }
668 if self.drop_guard.is_none() {
669 unsafe { self.raw.destroy_device(None) };
670 }
671 }
672}
673
674pub struct Device {
675 shared: Arc<DeviceShared>,
676 mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
677 desc_allocator:
678 Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
679 valid_ash_memory_types: u32,
680 naga_options: naga::back::spv::Options<'static>,
681 #[cfg(feature = "renderdoc")]
682 render_doc: crate::auxil::renderdoc::RenderDoc,
683 counters: Arc<wgt::HalCounters>,
684}
685
686impl Drop for Device {
687 fn drop(&mut self) {
688 unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
689 unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
690 }
691}
692
693/// Semaphores for forcing queue submissions to run in order.
694///
695/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
696/// ordered, then the first submission will finish on the GPU before the second
697/// submission begins. To get this behavior on Vulkan we need to pass semaphores
698/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
699/// and to signal when their execution is done.
700///
701/// Normally this can be done with a single semaphore, waited on and then
702/// signalled for each submission. At any given time there's exactly one
703/// submission that would signal the semaphore, and exactly one waiting on it,
704/// as Vulkan requires.
705///
706/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
707/// hang if we use a single semaphore. The workaround is to alternate between
708/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
709/// the workaround until, say, Oct 2026.
710///
711/// [`wgpu_hal::Queue`]: crate::Queue
712/// [`submit`]: crate::Queue::submit
713/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
714/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
715#[derive(Clone)]
716struct RelaySemaphores {
717 /// The semaphore the next submission should wait on before beginning
718 /// execution on the GPU. This is `None` for the first submission, which
719 /// should not wait on anything at all.
720 wait: Option<vk::Semaphore>,
721
722 /// The semaphore the next submission should signal when it has finished
723 /// execution on the GPU.
724 signal: vk::Semaphore,
725}
726
727impl RelaySemaphores {
728 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
729 Ok(Self {
730 wait: None,
731 signal: device.new_binary_semaphore()?,
732 })
733 }
734
735 /// Advances the semaphores, returning the semaphores that should be used for a submission.
736 fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
737 let old = self.clone();
738
739 // Build the state for the next submission.
740 match self.wait {
741 None => {
742 // The `old` values describe the first submission to this queue.
743 // The second submission should wait on `old.signal`, and then
744 // signal a new semaphore which we'll create now.
745 self.wait = Some(old.signal);
746 self.signal = device.new_binary_semaphore()?;
747 }
748 Some(ref mut wait) => {
749 // What this submission signals, the next should wait.
750 mem::swap(wait, &mut self.signal);
751 }
752 };
753
754 Ok(old)
755 }
756
757 /// Destroys the semaphores.
758 unsafe fn destroy(&self, device: &ash::Device) {
759 unsafe {
760 if let Some(wait) = self.wait {
761 device.destroy_semaphore(wait, None);
762 }
763 device.destroy_semaphore(self.signal, None);
764 }
765 }
766}
767
768pub struct Queue {
769 raw: vk::Queue,
770 swapchain_fn: khr::swapchain::Device,
771 device: Arc<DeviceShared>,
772 family_index: u32,
773 relay_semaphores: Mutex<RelaySemaphores>,
774 signal_semaphores: Mutex<(Vec<vk::Semaphore>, Vec<u64>)>,
775}
776
777impl Queue {
778 pub fn as_raw(&self) -> vk::Queue {
779 self.raw
780 }
781}
782
783impl Drop for Queue {
784 fn drop(&mut self) {
785 unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
786 }
787}
788
789#[derive(Debug)]
790pub struct Buffer {
791 raw: vk::Buffer,
792 block: Option<Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>>,
793}
794
795impl crate::DynBuffer for Buffer {}
796
797#[derive(Debug)]
798pub struct AccelerationStructure {
799 raw: vk::AccelerationStructureKHR,
800 buffer: vk::Buffer,
801 block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
802 compacted_size_query: Option<vk::QueryPool>,
803}
804
805impl crate::DynAccelerationStructure for AccelerationStructure {}
806
807#[derive(Debug)]
808pub struct Texture {
809 raw: vk::Image,
810 drop_guard: Option<crate::DropGuard>,
811 external_memory: Option<vk::DeviceMemory>,
812 block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
813 usage: wgt::TextureUses,
814 format: wgt::TextureFormat,
815 raw_flags: vk::ImageCreateFlags,
816 copy_size: crate::CopyExtent,
817 view_formats: Vec<wgt::TextureFormat>,
818}
819
820impl crate::DynTexture for Texture {}
821
822impl Texture {
823 /// # Safety
824 ///
825 /// - The image handle must not be manually destroyed
826 pub unsafe fn raw_handle(&self) -> vk::Image {
827 self.raw
828 }
829}
830
831#[derive(Debug)]
832pub struct TextureView {
833 raw: vk::ImageView,
834 layers: NonZeroU32,
835 attachment: FramebufferAttachment,
836}
837
838impl crate::DynTextureView for TextureView {}
839
840impl TextureView {
841 /// # Safety
842 ///
843 /// - The image view handle must not be manually destroyed
844 pub unsafe fn raw_handle(&self) -> vk::ImageView {
845 self.raw
846 }
847}
848
849#[derive(Debug)]
850pub struct Sampler {
851 raw: vk::Sampler,
852 create_info: vk::SamplerCreateInfo<'static>,
853}
854
855impl crate::DynSampler for Sampler {}
856
857#[derive(Debug)]
858pub struct BindGroupLayout {
859 raw: vk::DescriptorSetLayout,
860 desc_count: gpu_descriptor::DescriptorTotalCount,
861 types: Box<[(vk::DescriptorType, u32)]>,
862 /// Map of binding index to size,
863 binding_arrays: Vec<(u32, NonZeroU32)>,
864}
865
866impl crate::DynBindGroupLayout for BindGroupLayout {}
867
868#[derive(Debug)]
869pub struct PipelineLayout {
870 raw: vk::PipelineLayout,
871 binding_arrays: naga::back::spv::BindingMap,
872}
873
874impl crate::DynPipelineLayout for PipelineLayout {}
875
876#[derive(Debug)]
877pub struct BindGroup {
878 set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
879}
880
881impl crate::DynBindGroup for BindGroup {}
882
883/// Miscellaneous allocation recycling pool for `CommandAllocator`.
884#[derive(Default)]
885struct Temp {
886 marker: Vec<u8>,
887 buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
888 image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
889}
890
891impl Temp {
892 fn clear(&mut self) {
893 self.marker.clear();
894 self.buffer_barriers.clear();
895 self.image_barriers.clear();
896 }
897
898 fn make_c_str(&mut self, name: &str) -> &CStr {
899 self.marker.clear();
900 self.marker.extend_from_slice(name.as_bytes());
901 self.marker.push(0);
902 unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
903 }
904}
905
906pub struct CommandEncoder {
907 raw: vk::CommandPool,
908 device: Arc<DeviceShared>,
909
910 /// The current command buffer, if `self` is in the ["recording"]
911 /// state.
912 ///
913 /// ["recording"]: crate::CommandEncoder
914 ///
915 /// If non-`null`, the buffer is in the Vulkan "recording" state.
916 active: vk::CommandBuffer,
917
918 /// What kind of pass we are currently within: compute or render.
919 bind_point: vk::PipelineBindPoint,
920
921 /// Allocation recycling pool for this encoder.
922 temp: Temp,
923
924 /// A pool of available command buffers.
925 ///
926 /// These are all in the Vulkan "initial" state.
927 free: Vec<vk::CommandBuffer>,
928
929 /// A pool of discarded command buffers.
930 ///
931 /// These could be in any Vulkan state except "pending".
932 discarded: Vec<vk::CommandBuffer>,
933
934 /// If this is true, the active renderpass enabled a debug span,
935 /// and needs to be disabled on renderpass close.
936 rpass_debug_marker_active: bool,
937
938 /// If set, the end of the next render/compute pass will write a timestamp at
939 /// the given pool & location.
940 end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
941
942 counters: Arc<wgt::HalCounters>,
943}
944
945impl Drop for CommandEncoder {
946 fn drop(&mut self) {
947 // SAFETY:
948 //
949 // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
950 // `CommandBuffer` must live until its execution is complete, and that a
951 // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
952 // Thus, we know that none of our `CommandBuffers` are in the "pending"
953 // state.
954 //
955 // The other VUIDs are pretty obvious.
956 unsafe {
957 // `vkDestroyCommandPool` also frees any command buffers allocated
958 // from that pool, so there's no need to explicitly call
959 // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
960 // fields.
961 self.device.raw.destroy_command_pool(self.raw, None);
962 }
963 self.counters.command_encoders.sub(1);
964 }
965}
966
967impl CommandEncoder {
968 /// # Safety
969 ///
970 /// - The command buffer handle must not be manually destroyed
971 pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
972 self.active
973 }
974}
975
976impl fmt::Debug for CommandEncoder {
977 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
978 f.debug_struct("CommandEncoder")
979 .field("raw", &self.raw)
980 .finish()
981 }
982}
983
984#[derive(Debug)]
985pub struct CommandBuffer {
986 raw: vk::CommandBuffer,
987}
988
989impl crate::DynCommandBuffer for CommandBuffer {}
990
991#[derive(Debug)]
992#[allow(clippy::large_enum_variant)]
993pub enum ShaderModule {
994 Raw(vk::ShaderModule),
995 Intermediate {
996 naga_shader: crate::NagaShader,
997 runtime_checks: wgt::ShaderRuntimeChecks,
998 },
999}
1000
1001impl crate::DynShaderModule for ShaderModule {}
1002
1003#[derive(Debug)]
1004pub struct RenderPipeline {
1005 raw: vk::Pipeline,
1006}
1007
1008impl crate::DynRenderPipeline for RenderPipeline {}
1009
1010#[derive(Debug)]
1011pub struct ComputePipeline {
1012 raw: vk::Pipeline,
1013}
1014
1015impl crate::DynComputePipeline for ComputePipeline {}
1016
1017#[derive(Debug)]
1018pub struct PipelineCache {
1019 raw: vk::PipelineCache,
1020}
1021
1022impl crate::DynPipelineCache for PipelineCache {}
1023
1024#[derive(Debug)]
1025pub struct QuerySet {
1026 raw: vk::QueryPool,
1027}
1028
1029impl crate::DynQuerySet for QuerySet {}
1030
1031/// The [`Api::Fence`] type for [`vulkan::Api`].
1032///
1033/// This is an `enum` because there are two possible implementations of
1034/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1035/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1036/// require non-1.0 features.
1037///
1038/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1039/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1040/// otherwise.
1041///
1042/// [`Api::Fence`]: crate::Api::Fence
1043/// [`vulkan::Api`]: Api
1044/// [`Device::create_fence`]: crate::Device::create_fence
1045/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1046/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1047/// [`FencePool`]: Fence::FencePool
1048#[derive(Debug)]
1049pub enum Fence {
1050 /// A Vulkan [timeline semaphore].
1051 ///
1052 /// These are simpler to use than Vulkan fences, since timeline semaphores
1053 /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1054 ///
1055 /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1056 /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1057 TimelineSemaphore(vk::Semaphore),
1058
1059 /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1060 ///
1061 /// The effective [`FenceValue`] of this variant is the greater of
1062 /// `last_completed` and the maximum value associated with a signalled fence
1063 /// in `active`.
1064 ///
1065 /// Fences are available in all versions of Vulkan, but since they only have
1066 /// two states, "signaled" and "unsignaled", we need to use a separate fence
1067 /// for each queue submission we might want to wait for, and remember which
1068 /// [`FenceValue`] each one represents.
1069 ///
1070 /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1071 /// [`FenceValue`]: crate::FenceValue
1072 FencePool {
1073 last_completed: crate::FenceValue,
1074 /// The pending fence values have to be ascending.
1075 active: Vec<(crate::FenceValue, vk::Fence)>,
1076 free: Vec<vk::Fence>,
1077 },
1078}
1079
1080impl crate::DynFence for Fence {}
1081
1082impl Fence {
1083 /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1084 ///
1085 /// As an optimization, assume that we already know that the fence has
1086 /// reached `last_completed`, and don't bother checking fences whose values
1087 /// are less than that: those fences remain in the `active` array only
1088 /// because we haven't called `maintain` yet to clean them up.
1089 ///
1090 /// [`FenceValue`]: crate::FenceValue
1091 fn check_active(
1092 device: &ash::Device,
1093 mut last_completed: crate::FenceValue,
1094 active: &[(crate::FenceValue, vk::Fence)],
1095 ) -> Result<crate::FenceValue, crate::DeviceError> {
1096 for &(value, raw) in active.iter() {
1097 unsafe {
1098 if value > last_completed
1099 && device
1100 .get_fence_status(raw)
1101 .map_err(map_host_device_oom_and_lost_err)?
1102 {
1103 last_completed = value;
1104 }
1105 }
1106 }
1107 Ok(last_completed)
1108 }
1109
1110 /// Return the highest signalled [`FenceValue`] for `self`.
1111 ///
1112 /// [`FenceValue`]: crate::FenceValue
1113 fn get_latest(
1114 &self,
1115 device: &ash::Device,
1116 extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1117 ) -> Result<crate::FenceValue, crate::DeviceError> {
1118 match *self {
1119 Self::TimelineSemaphore(raw) => unsafe {
1120 Ok(match *extension.unwrap() {
1121 ExtensionFn::Extension(ref ext) => ext
1122 .get_semaphore_counter_value(raw)
1123 .map_err(map_host_device_oom_and_lost_err)?,
1124 ExtensionFn::Promoted => device
1125 .get_semaphore_counter_value(raw)
1126 .map_err(map_host_device_oom_and_lost_err)?,
1127 })
1128 },
1129 Self::FencePool {
1130 last_completed,
1131 ref active,
1132 free: _,
1133 } => Self::check_active(device, last_completed, active),
1134 }
1135 }
1136
1137 /// Trim the internal state of this [`Fence`].
1138 ///
1139 /// This function has no externally visible effect, but you should call it
1140 /// periodically to keep this fence's resource consumption under control.
1141 ///
1142 /// For fences using the [`FencePool`] implementation, this function
1143 /// recycles fences that have been signaled. If you don't call this,
1144 /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1145 /// time it's called.
1146 ///
1147 /// [`FencePool`]: Fence::FencePool
1148 /// [`Queue::submit`]: crate::Queue::submit
1149 fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1150 match *self {
1151 Self::TimelineSemaphore(_) => {}
1152 Self::FencePool {
1153 ref mut last_completed,
1154 ref mut active,
1155 ref mut free,
1156 } => {
1157 let latest = Self::check_active(device, *last_completed, active)?;
1158 let base_free = free.len();
1159 for &(value, raw) in active.iter() {
1160 if value <= latest {
1161 free.push(raw);
1162 }
1163 }
1164 if free.len() != base_free {
1165 active.retain(|&(value, _)| value > latest);
1166 unsafe { device.reset_fences(&free[base_free..]) }
1167 .map_err(map_device_oom_err)?
1168 }
1169 *last_completed = latest;
1170 }
1171 }
1172 Ok(())
1173 }
1174}
1175
1176impl crate::Queue for Queue {
1177 type A = Api;
1178
1179 unsafe fn submit(
1180 &self,
1181 command_buffers: &[&CommandBuffer],
1182 surface_textures: &[&SurfaceTexture],
1183 (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1184 ) -> Result<(), crate::DeviceError> {
1185 let mut fence_raw = vk::Fence::null();
1186
1187 let mut wait_stage_masks = Vec::new();
1188 let mut wait_semaphores = Vec::new();
1189 let mut signal_semaphores = Vec::new();
1190 let mut signal_values = Vec::new();
1191
1192 // Double check that the same swapchain image isn't being given to us multiple times,
1193 // as that will deadlock when we try to lock them all.
1194 debug_assert!(
1195 {
1196 let mut check = HashSet::with_capacity(surface_textures.len());
1197 // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
1198 for st in surface_textures {
1199 check.insert(Arc::as_ptr(&st.surface_semaphores));
1200 }
1201 check.len() == surface_textures.len()
1202 },
1203 "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1204 );
1205
1206 let locked_swapchain_semaphores = surface_textures
1207 .iter()
1208 .map(|st| {
1209 st.surface_semaphores
1210 .try_lock()
1211 .expect("Failed to lock surface semaphore.")
1212 })
1213 .collect::<Vec<_>>();
1214
1215 for mut swapchain_semaphore in locked_swapchain_semaphores {
1216 swapchain_semaphore.set_used_fence_value(signal_value);
1217
1218 // If we're the first submission to operate on this image, wait on
1219 // its acquire semaphore, to make sure the presentation engine is
1220 // done with it.
1221 if let Some(sem) = swapchain_semaphore.get_acquire_wait_semaphore() {
1222 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1223 wait_semaphores.push(sem);
1224 }
1225
1226 // Get a semaphore to signal when we're done writing to this surface
1227 // image. Presentation of this image will wait for this.
1228 let signal_semaphore = swapchain_semaphore.get_submit_signal_semaphore(&self.device)?;
1229 signal_semaphores.push(signal_semaphore);
1230 signal_values.push(!0);
1231 }
1232
1233 let mut guards = self.signal_semaphores.lock();
1234 let (ref mut pending_signal_semaphores, ref mut pending_signal_semaphore_values) =
1235 guards.deref_mut();
1236 assert!(pending_signal_semaphores.len() == pending_signal_semaphore_values.len());
1237 if !pending_signal_semaphores.is_empty() {
1238 signal_semaphores.append(pending_signal_semaphores);
1239 signal_values.append(pending_signal_semaphore_values);
1240 }
1241
1242 // In order for submissions to be strictly ordered, we encode a dependency between each submission
1243 // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1244 let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1245
1246 if let Some(sem) = semaphore_state.wait {
1247 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1248 wait_semaphores.push(sem);
1249 }
1250
1251 signal_semaphores.push(semaphore_state.signal);
1252 signal_values.push(!0);
1253
1254 // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1255 signal_fence.maintain(&self.device.raw)?;
1256 match *signal_fence {
1257 Fence::TimelineSemaphore(raw) => {
1258 signal_semaphores.push(raw);
1259 signal_values.push(signal_value);
1260 }
1261 Fence::FencePool {
1262 ref mut active,
1263 ref mut free,
1264 ..
1265 } => {
1266 fence_raw = match free.pop() {
1267 Some(raw) => raw,
1268 None => unsafe {
1269 self.device
1270 .raw
1271 .create_fence(&vk::FenceCreateInfo::default(), None)
1272 .map_err(map_host_device_oom_err)?
1273 },
1274 };
1275 active.push((signal_value, fence_raw));
1276 }
1277 }
1278
1279 let vk_cmd_buffers = command_buffers
1280 .iter()
1281 .map(|cmd| cmd.raw)
1282 .collect::<Vec<_>>();
1283
1284 let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1285
1286 vk_info = vk_info
1287 .wait_semaphores(&wait_semaphores)
1288 .wait_dst_stage_mask(&wait_stage_masks)
1289 .signal_semaphores(&signal_semaphores);
1290
1291 let mut vk_timeline_info;
1292
1293 if self.device.private_caps.timeline_semaphores {
1294 vk_timeline_info =
1295 vk::TimelineSemaphoreSubmitInfo::default().signal_semaphore_values(&signal_values);
1296 vk_info = vk_info.push_next(&mut vk_timeline_info);
1297 }
1298
1299 profiling::scope!("vkQueueSubmit");
1300 unsafe {
1301 self.device
1302 .raw
1303 .queue_submit(self.raw, &[vk_info], fence_raw)
1304 .map_err(map_host_device_oom_and_lost_err)?
1305 };
1306 Ok(())
1307 }
1308
1309 unsafe fn present(
1310 &self,
1311 surface: &Surface,
1312 texture: SurfaceTexture,
1313 ) -> Result<(), crate::SurfaceError> {
1314 let mut swapchain = surface.swapchain.write();
1315 let ssc = swapchain.as_mut().unwrap();
1316 let mut swapchain_semaphores = texture.surface_semaphores.lock();
1317
1318 let swapchains = [ssc.raw];
1319 let image_indices = [texture.index];
1320 let vk_info = vk::PresentInfoKHR::default()
1321 .swapchains(&swapchains)
1322 .image_indices(&image_indices)
1323 .wait_semaphores(swapchain_semaphores.get_present_wait_semaphores());
1324
1325 let mut display_timing;
1326 let present_times;
1327 let vk_info = if let Some(present_time) = ssc.next_present_time.take() {
1328 debug_assert!(
1329 ssc.device
1330 .features
1331 .contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING),
1332 "`next_present_time` should only be set if `VULKAN_GOOGLE_DISPLAY_TIMING` is enabled"
1333 );
1334 present_times = [present_time];
1335 display_timing = vk::PresentTimesInfoGOOGLE::default().times(&present_times);
1336 // SAFETY: We know that VK_GOOGLE_display_timing is present because of the safety contract on `next_present_time`.
1337 vk_info.push_next(&mut display_timing)
1338 } else {
1339 vk_info
1340 };
1341
1342 let suboptimal = {
1343 profiling::scope!("vkQueuePresentKHR");
1344 unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1345 match error {
1346 vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1347 vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1348 // We don't use VK_EXT_full_screen_exclusive
1349 // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
1350 _ => map_host_device_oom_and_lost_err(error).into(),
1351 }
1352 })?
1353 };
1354 if suboptimal {
1355 // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1356 // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1357 // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1358 // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1359 #[cfg(not(target_os = "android"))]
1360 log::warn!("Suboptimal present of frame {}", texture.index);
1361 }
1362 Ok(())
1363 }
1364
1365 unsafe fn get_timestamp_period(&self) -> f32 {
1366 self.device.timestamp_period
1367 }
1368}
1369
1370impl Queue {
1371 pub fn raw_device(&self) -> &ash::Device {
1372 &self.device.raw
1373 }
1374
1375 pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1376 let mut guards = self.signal_semaphores.lock();
1377 let (ref mut semaphores, ref mut semaphore_values) = guards.deref_mut();
1378 semaphores.push(semaphore);
1379 semaphore_values.push(semaphore_value.unwrap_or(!0));
1380 }
1381}
1382
1383/// Maps
1384///
1385/// - VK_ERROR_OUT_OF_HOST_MEMORY
1386/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1387fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1388 match err {
1389 vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1390 get_oom_err(err)
1391 }
1392 e => get_unexpected_err(e),
1393 }
1394}
1395
1396/// Maps
1397///
1398/// - VK_ERROR_OUT_OF_HOST_MEMORY
1399/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1400/// - VK_ERROR_DEVICE_LOST
1401fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1402 match err {
1403 vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1404 other => map_host_device_oom_err(other),
1405 }
1406}
1407
1408/// Maps
1409///
1410/// - VK_ERROR_OUT_OF_HOST_MEMORY
1411/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1412/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1413fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1414 // We don't use VK_KHR_buffer_device_address
1415 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1416 map_host_device_oom_err(err)
1417}
1418
1419/// Maps
1420///
1421/// - VK_ERROR_OUT_OF_HOST_MEMORY
1422fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1423 match err {
1424 vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1425 e => get_unexpected_err(e),
1426 }
1427}
1428
1429/// Maps
1430///
1431/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1432fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1433 match err {
1434 vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1435 e => get_unexpected_err(e),
1436 }
1437}
1438
1439/// Maps
1440///
1441/// - VK_ERROR_OUT_OF_HOST_MEMORY
1442/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1443fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1444 // We don't use VK_KHR_buffer_device_address
1445 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1446 map_host_oom_err(err)
1447}
1448
1449/// Maps
1450///
1451/// - VK_ERROR_OUT_OF_HOST_MEMORY
1452/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1453/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1454/// - VK_ERROR_INVALID_SHADER_NV
1455fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1456 // We don't use VK_EXT_pipeline_creation_cache_control
1457 // VK_PIPELINE_COMPILE_REQUIRED_EXT
1458 // We don't use VK_NV_glsl_shader
1459 // VK_ERROR_INVALID_SHADER_NV
1460 map_host_device_oom_err(err)
1461}
1462
1463/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1464/// feature flag is enabled.
1465fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1466 #[cfg(feature = "internal_error_panic")]
1467 panic!("Unexpected Vulkan error: {_err:?}");
1468
1469 #[allow(unreachable_code)]
1470 crate::DeviceError::Unexpected
1471}
1472
1473/// Returns [`crate::DeviceError::OutOfMemory`] or panics if the `oom_panic`
1474/// feature flag is enabled.
1475fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1476 #[cfg(feature = "oom_panic")]
1477 panic!("Out of memory ({_err:?})");
1478
1479 #[allow(unreachable_code)]
1480 crate::DeviceError::OutOfMemory
1481}
1482
1483/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1484/// feature flag is enabled.
1485fn get_lost_err() -> crate::DeviceError {
1486 #[cfg(feature = "device_lost_panic")]
1487 panic!("Device lost");
1488
1489 #[allow(unreachable_code)]
1490 crate::DeviceError::Lost
1491}
1492
1493#[derive(Clone, Copy, Pod, Zeroable)]
1494#[repr(C)]
1495struct RawTlasInstance {
1496 transform: [f32; 12],
1497 custom_data_and_mask: u32,
1498 shader_binding_table_record_offset_and_flags: u32,
1499 acceleration_structure_reference: u64,
1500}