wgpu_hal/vulkan/mod.rs
1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8 - temporarily allocating `Vec` on heap, where overhead is permitted
9 - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35
36pub use adapter::PhysicalDeviceFeatures;
37
38use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
39use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
40
41use arrayvec::ArrayVec;
42use ash::{ext, khr, vk};
43use bytemuck::{Pod, Zeroable};
44use hashbrown::HashSet;
45use parking_lot::{Mutex, RwLock};
46
47use naga::FastHashMap;
48use wgt::InternalCounter;
49
50use semaphore_list::SemaphoreList;
51
52const MILLIS_TO_NANOS: u64 = 1_000_000;
53const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
54
55#[derive(Clone, Debug)]
56pub struct Api;
57
58impl crate::Api for Api {
59 type Instance = Instance;
60 type Surface = Surface;
61 type Adapter = Adapter;
62 type Device = Device;
63
64 type Queue = Queue;
65 type CommandEncoder = CommandEncoder;
66 type CommandBuffer = CommandBuffer;
67
68 type Buffer = Buffer;
69 type Texture = Texture;
70 type SurfaceTexture = SurfaceTexture;
71 type TextureView = TextureView;
72 type Sampler = Sampler;
73 type QuerySet = QuerySet;
74 type Fence = Fence;
75 type AccelerationStructure = AccelerationStructure;
76 type PipelineCache = PipelineCache;
77
78 type BindGroupLayout = BindGroupLayout;
79 type BindGroup = BindGroup;
80 type PipelineLayout = PipelineLayout;
81 type ShaderModule = ShaderModule;
82 type RenderPipeline = RenderPipeline;
83 type ComputePipeline = ComputePipeline;
84}
85
86crate::impl_dyn_resource!(
87 Adapter,
88 AccelerationStructure,
89 BindGroup,
90 BindGroupLayout,
91 Buffer,
92 CommandBuffer,
93 CommandEncoder,
94 ComputePipeline,
95 Device,
96 Fence,
97 Instance,
98 PipelineCache,
99 PipelineLayout,
100 QuerySet,
101 Queue,
102 RenderPipeline,
103 Sampler,
104 ShaderModule,
105 Surface,
106 SurfaceTexture,
107 Texture,
108 TextureView
109);
110
111struct DebugUtils {
112 extension: ext::debug_utils::Instance,
113 messenger: vk::DebugUtilsMessengerEXT,
114
115 /// Owning pointer to the debug messenger callback user data.
116 ///
117 /// `InstanceShared::drop` destroys the debug messenger before
118 /// dropping this, so the callback should never receive a dangling
119 /// user data pointer.
120 #[allow(dead_code)]
121 callback_data: Box<DebugUtilsMessengerUserData>,
122}
123
124pub struct DebugUtilsCreateInfo {
125 severity: vk::DebugUtilsMessageSeverityFlagsEXT,
126 message_type: vk::DebugUtilsMessageTypeFlagsEXT,
127 callback_data: Box<DebugUtilsMessengerUserData>,
128}
129
130#[derive(Debug)]
131/// The properties related to the validation layer needed for the
132/// DebugUtilsMessenger for their workarounds
133struct ValidationLayerProperties {
134 /// Validation layer description, from `vk::LayerProperties`.
135 layer_description: CString,
136
137 /// Validation layer specification version, from `vk::LayerProperties`.
138 layer_spec_version: u32,
139}
140
141/// User data needed by `instance::debug_utils_messenger_callback`.
142///
143/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
144/// pointer refers to one of these values.
145#[derive(Debug)]
146pub struct DebugUtilsMessengerUserData {
147 /// The properties related to the validation layer, if present
148 validation_layer_properties: Option<ValidationLayerProperties>,
149
150 /// If the OBS layer is present. OBS never increments the version of their layer,
151 /// so there's no reason to have the version.
152 has_obs_layer: bool,
153}
154
155pub struct InstanceShared {
156 raw: ash::Instance,
157 extensions: Vec<&'static CStr>,
158 drop_guard: Option<crate::DropGuard>,
159 flags: wgt::InstanceFlags,
160 memory_budget_thresholds: wgt::MemoryBudgetThresholds,
161 debug_utils: Option<DebugUtils>,
162 get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
163 entry: ash::Entry,
164 has_nv_optimus: bool,
165 android_sdk_version: u32,
166 /// The instance API version.
167 ///
168 /// Which is the version of Vulkan supported for instance-level functionality.
169 ///
170 /// It is associated with a `VkInstance` and its children,
171 /// except for a `VkPhysicalDevice` and its children.
172 instance_api_version: u32,
173}
174
175pub struct Instance {
176 shared: Arc<InstanceShared>,
177}
178
179/// The semaphores needed to use one image in a swapchain.
180#[derive(Debug)]
181struct SwapchainImageSemaphores {
182 /// A semaphore that is signaled when this image is safe for us to modify.
183 ///
184 /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
185 /// image that we should use, that image may actually still be in use by the
186 /// presentation engine, and is not yet safe to modify. However, that
187 /// function does accept a semaphore that it will signal when the image is
188 /// indeed safe to begin messing with.
189 ///
190 /// This semaphore is:
191 ///
192 /// - waited for by the first queue submission to operate on this image
193 /// since it was acquired, and
194 ///
195 /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
196 /// for us to use.
197 ///
198 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
199 acquire: vk::Semaphore,
200
201 /// True if the next command submission operating on this image should wait
202 /// for [`acquire`].
203 ///
204 /// We must wait for `acquire` before drawing to this swapchain image, but
205 /// because `wgpu-hal` queue submissions are always strongly ordered, only
206 /// the first submission that works with a swapchain image actually needs to
207 /// wait. We set this flag when this image is acquired, and clear it the
208 /// first time it's passed to [`Queue::submit`] as a surface texture.
209 ///
210 /// [`acquire`]: SwapchainImageSemaphores::acquire
211 /// [`Queue::submit`]: crate::Queue::submit
212 should_wait_for_acquire: bool,
213
214 /// A pool of semaphores for ordering presentation after drawing.
215 ///
216 /// The first [`present_index`] semaphores in this vector are:
217 ///
218 /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
219 /// image, and
220 ///
221 /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
222 /// this image, when the submission finishes execution.
223 ///
224 /// This vector accumulates one semaphore per submission that writes to this
225 /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
226 /// requires a semaphore to order it with respect to drawing commands, and
227 /// we can't attach new completion semaphores to a command submission after
228 /// it's been submitted. This means that, at submission time, we must create
229 /// the semaphore we might need if the caller's next action is to enqueue a
230 /// presentation of this image.
231 ///
232 /// An alternative strategy would be for presentation to enqueue an empty
233 /// submit, ordered relative to other submits in the usual way, and
234 /// signaling a single presentation semaphore. But we suspect that submits
235 /// are usually expensive enough, and semaphores usually cheap enough, that
236 /// performance-sensitive users will avoid making many submits, so that the
237 /// cost of accumulated semaphores will usually be less than the cost of an
238 /// additional submit.
239 ///
240 /// Only the first [`present_index`] semaphores in the vector are actually
241 /// going to be signalled by submitted commands, and need to be waited for
242 /// by the next present call. Any semaphores beyond that index were created
243 /// for prior presents and are simply being retained for recycling.
244 ///
245 /// [`present_index`]: SwapchainImageSemaphores::present_index
246 /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
247 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
248 present: Vec<vk::Semaphore>,
249
250 /// The number of semaphores in [`present`] to be signalled for this submission.
251 ///
252 /// [`present`]: SwapchainImageSemaphores::present
253 present_index: usize,
254
255 /// The fence value of the last command submission that wrote to this image.
256 ///
257 /// The next time we try to acquire this image, we'll block until
258 /// this submission finishes, proving that [`acquire`] is ready to
259 /// pass to `vkAcquireNextImageKHR` again.
260 ///
261 /// [`acquire`]: SwapchainImageSemaphores::acquire
262 previously_used_submission_index: crate::FenceValue,
263}
264
265impl SwapchainImageSemaphores {
266 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
267 Ok(Self {
268 acquire: device.new_binary_semaphore()?,
269 should_wait_for_acquire: true,
270 present: Vec::new(),
271 present_index: 0,
272 previously_used_submission_index: 0,
273 })
274 }
275
276 fn set_used_fence_value(&mut self, value: crate::FenceValue) {
277 self.previously_used_submission_index = value;
278 }
279
280 /// Return the semaphore that commands drawing to this image should wait for, if any.
281 ///
282 /// This only returns `Some` once per acquisition; see
283 /// [`SwapchainImageSemaphores::should_wait_for_acquire`] for details.
284 fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
285 if self.should_wait_for_acquire {
286 self.should_wait_for_acquire = false;
287 Some(self.acquire)
288 } else {
289 None
290 }
291 }
292
293 /// Return a semaphore that a submission that writes to this image should
294 /// signal when it's done.
295 ///
296 /// See [`SwapchainImageSemaphores::present`] for details.
297 fn get_submit_signal_semaphore(
298 &mut self,
299 device: &DeviceShared,
300 ) -> Result<vk::Semaphore, crate::DeviceError> {
301 // Try to recycle a semaphore we created for a previous presentation.
302 let sem = match self.present.get(self.present_index) {
303 Some(sem) => *sem,
304 None => {
305 let sem = device.new_binary_semaphore()?;
306 self.present.push(sem);
307 sem
308 }
309 };
310
311 self.present_index += 1;
312
313 Ok(sem)
314 }
315
316 /// Return the semaphores that a presentation of this image should wait on.
317 ///
318 /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
319 /// ends this image's acquisition should wait for. See
320 /// [`SwapchainImageSemaphores::present`] for details.
321 ///
322 /// Reset `self` to be ready for the next acquisition cycle.
323 ///
324 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
325 fn get_present_wait_semaphores(&mut self) -> &[vk::Semaphore] {
326 let old_index = self.present_index;
327
328 // Since this marks the end of this acquire/draw/present cycle, take the
329 // opportunity to reset `self` in preparation for the next acquisition.
330 self.present_index = 0;
331 self.should_wait_for_acquire = true;
332
333 &self.present[0..old_index]
334 }
335
336 unsafe fn destroy(&self, device: &ash::Device) {
337 unsafe {
338 device.destroy_semaphore(self.acquire, None);
339 for sem in &self.present {
340 device.destroy_semaphore(*sem, None);
341 }
342 }
343 }
344}
345
346struct Swapchain {
347 raw: vk::SwapchainKHR,
348 functor: khr::swapchain::Device,
349 device: Arc<DeviceShared>,
350 images: Vec<vk::Image>,
351 config: crate::SurfaceConfiguration,
352 /// One wait semaphore per swapchain image. This will be associated with the
353 /// surface texture, and later collected during submission.
354 ///
355 /// We need this to be `Arc<Mutex<>>` because we need to be able to pass this
356 /// data into the surface texture, so submit/present can use it.
357 surface_semaphores: Vec<Arc<Mutex<SwapchainImageSemaphores>>>,
358 /// The index of the next semaphore to use. Ideally we would use the same
359 /// index as the image index, but we need to specify the semaphore as an argument
360 /// to the acquire_next_image function which is what tells us which image to use.
361 next_semaphore_index: usize,
362 /// The present timing information which will be set in the next call to [`present()`](crate::Queue::present()).
363 ///
364 /// # Safety
365 ///
366 /// This must only be set if [`wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING`] is enabled, and
367 /// so the VK_GOOGLE_display_timing extension is present.
368 next_present_time: Option<vk::PresentTimeGOOGLE>,
369}
370
371impl Swapchain {
372 fn advance_surface_semaphores(&mut self) {
373 let semaphore_count = self.surface_semaphores.len();
374 self.next_semaphore_index = (self.next_semaphore_index + 1) % semaphore_count;
375 }
376
377 fn get_surface_semaphores(&self) -> Arc<Mutex<SwapchainImageSemaphores>> {
378 self.surface_semaphores[self.next_semaphore_index].clone()
379 }
380}
381
382pub struct Surface {
383 raw: vk::SurfaceKHR,
384 functor: khr::surface::Instance,
385 instance: Arc<InstanceShared>,
386 swapchain: RwLock<Option<Swapchain>>,
387}
388
389impl Surface {
390 /// Get the raw Vulkan swapchain associated with this surface.
391 ///
392 /// Returns [`None`] if the surface is not configured.
393 pub fn raw_swapchain(&self) -> Option<vk::SwapchainKHR> {
394 let read = self.swapchain.read();
395 read.as_ref().map(|it| it.raw)
396 }
397
398 /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
399 /// using [VK_GOOGLE_display_timing].
400 ///
401 /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
402 /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
403 ///
404 /// This can also be used to add a "not before" timestamp to the presentation.
405 ///
406 /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
407 ///
408 /// # Panics
409 ///
410 /// - If the surface hasn't been configured.
411 /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
412 ///
413 /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
414 #[track_caller]
415 pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
416 let mut swapchain = self.swapchain.write();
417 let swapchain = swapchain
418 .as_mut()
419 .expect("Surface should have been configured");
420 let features = wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING;
421 if swapchain.device.features.contains(features) {
422 swapchain.next_present_time = Some(present_timing);
423 } else {
424 // Ideally we'd use something like `device.required_features` here, but that's in `wgpu-core`, which we are a dependency of
425 panic!(
426 concat!(
427 "Tried to set display timing properties ",
428 "without the corresponding feature ({:?}) enabled."
429 ),
430 features
431 );
432 }
433 }
434}
435
436#[derive(Debug)]
437pub struct SurfaceTexture {
438 index: u32,
439 texture: Texture,
440 surface_semaphores: Arc<Mutex<SwapchainImageSemaphores>>,
441}
442
443impl crate::DynSurfaceTexture for SurfaceTexture {}
444
445impl Borrow<Texture> for SurfaceTexture {
446 fn borrow(&self) -> &Texture {
447 &self.texture
448 }
449}
450
451impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
452 fn borrow(&self) -> &dyn crate::DynTexture {
453 &self.texture
454 }
455}
456
457pub struct Adapter {
458 raw: vk::PhysicalDevice,
459 instance: Arc<InstanceShared>,
460 //queue_families: Vec<vk::QueueFamilyProperties>,
461 known_memory_flags: vk::MemoryPropertyFlags,
462 phd_capabilities: adapter::PhysicalDeviceProperties,
463 phd_features: PhysicalDeviceFeatures,
464 downlevel_flags: wgt::DownlevelFlags,
465 private_caps: PrivateCapabilities,
466 workarounds: Workarounds,
467}
468
469// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
470enum ExtensionFn<T> {
471 /// The loaded function pointer struct for an extension.
472 Extension(T),
473 /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
474 Promoted,
475}
476
477struct DeviceExtensionFunctions {
478 debug_utils: Option<ext::debug_utils::Device>,
479 draw_indirect_count: Option<khr::draw_indirect_count::Device>,
480 timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
481 ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
482 mesh_shading: Option<ext::mesh_shader::Device>,
483}
484
485struct RayTracingDeviceExtensionFunctions {
486 acceleration_structure: khr::acceleration_structure::Device,
487 buffer_device_address: khr::buffer_device_address::Device,
488}
489
490/// Set of internal capabilities, which don't show up in the exposed
491/// device geometry, but affect the code paths taken internally.
492#[derive(Clone, Debug)]
493struct PrivateCapabilities {
494 image_view_usage: bool,
495 timeline_semaphores: bool,
496 texture_d24: bool,
497 texture_d24_s8: bool,
498 texture_s8: bool,
499 /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
500 can_present: bool,
501 non_coherent_map_mask: wgt::BufferAddress,
502
503 /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
504 ///
505 /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
506 /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
507 /// a given bindgroup binding outside that binding's [accessible
508 /// region][ar]. Enabling `robustBufferAccess` does ensure that
509 /// out-of-bounds reads and writes are not undefined behavior (that's good),
510 /// but still permits out-of-bounds reads to return data from anywhere
511 /// within the buffer, not just the accessible region.
512 ///
513 /// [ar]: ../struct.BufferBinding.html#accessible-region
514 /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
515 robust_buffer_access: bool,
516
517 robust_image_access: bool,
518
519 /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
520 /// [`robustBufferAccess2`] feature.
521 ///
522 /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
523 /// shader accesses to buffer contents. If this feature is not available,
524 /// this backend must have Naga inject bounds checks in the generated
525 /// SPIR-V.
526 ///
527 /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
528 /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
529 /// [ar]: ../struct.BufferBinding.html#accessible-region
530 robust_buffer_access2: bool,
531
532 robust_image_access2: bool,
533 zero_initialize_workgroup_memory: bool,
534 image_format_list: bool,
535 maximum_samplers: u32,
536
537 /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
538 /// (promoted to Vulkan 1.3).
539 ///
540 /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
541 ///
542 /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
543 shader_integer_dot_product: bool,
544
545 /// True if this adapter supports 8-bit integers provided by the
546 /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
547 ///
548 /// Allows shaders to declare the "Int8" capability. Note, however, that this
549 /// feature alone allows the use of 8-bit integers "only in the `Private`,
550 /// `Workgroup` (for non-Block variables), and `Function` storage classes"
551 /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
552 /// `StorageBuffer`), you also need to enable the corresponding feature in
553 /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
554 /// capability (e.g., `StorageBuffer8BitAccess`).
555 ///
556 /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
557 /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
558 shader_int8: bool,
559}
560
561bitflags::bitflags!(
562 /// Workaround flags.
563 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
564 pub struct Workarounds: u32 {
565 /// Only generate SPIR-V for one entry point at a time.
566 const SEPARATE_ENTRY_POINTS = 0x1;
567 /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
568 /// to a subpass resolve attachment array. This nulls out that pointer in that case.
569 const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
570 /// If the following code returns false, then nvidia will end up filling the wrong range.
571 ///
572 /// ```skip
573 /// fn nvidia_succeeds() -> bool {
574 /// # let (copy_length, start_offset) = (0, 0);
575 /// if copy_length >= 4096 {
576 /// if start_offset % 16 != 0 {
577 /// if copy_length == 4096 {
578 /// return true;
579 /// }
580 /// if copy_length % 16 == 0 {
581 /// return false;
582 /// }
583 /// }
584 /// }
585 /// true
586 /// }
587 /// ```
588 ///
589 /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
590 /// if they cover a range of 4096 bytes or more.
591 const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
592 }
593);
594
595#[derive(Clone, Debug, Eq, Hash, PartialEq)]
596struct AttachmentKey {
597 format: vk::Format,
598 layout: vk::ImageLayout,
599 ops: crate::AttachmentOps,
600}
601
602impl AttachmentKey {
603 /// Returns an attachment key for a compatible attachment.
604 fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
605 Self {
606 format,
607 layout,
608 ops: crate::AttachmentOps::all(),
609 }
610 }
611}
612
613#[derive(Clone, Eq, Hash, PartialEq)]
614struct ColorAttachmentKey {
615 base: AttachmentKey,
616 resolve: Option<AttachmentKey>,
617}
618
619#[derive(Clone, Eq, Hash, PartialEq)]
620struct DepthStencilAttachmentKey {
621 base: AttachmentKey,
622 stencil_ops: crate::AttachmentOps,
623}
624
625#[derive(Clone, Eq, Default, Hash, PartialEq)]
626struct RenderPassKey {
627 colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
628 depth_stencil: Option<DepthStencilAttachmentKey>,
629 sample_count: u32,
630 multiview: Option<NonZeroU32>,
631}
632
633struct DeviceShared {
634 raw: ash::Device,
635 family_index: u32,
636 queue_index: u32,
637 raw_queue: vk::Queue,
638 drop_guard: Option<crate::DropGuard>,
639 instance: Arc<InstanceShared>,
640 physical_device: vk::PhysicalDevice,
641 enabled_extensions: Vec<&'static CStr>,
642 extension_fns: DeviceExtensionFunctions,
643 vendor_id: u32,
644 pipeline_cache_validation_key: [u8; 16],
645 timestamp_period: f32,
646 private_caps: PrivateCapabilities,
647 workarounds: Workarounds,
648 features: wgt::Features,
649 render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
650 sampler_cache: Mutex<sampler::SamplerCache>,
651 memory_allocations_counter: InternalCounter,
652}
653
654impl Drop for DeviceShared {
655 fn drop(&mut self) {
656 for &raw in self.render_passes.lock().values() {
657 unsafe { self.raw.destroy_render_pass(raw, None) };
658 }
659 if self.drop_guard.is_none() {
660 unsafe { self.raw.destroy_device(None) };
661 }
662 }
663}
664
665pub struct Device {
666 shared: Arc<DeviceShared>,
667 mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
668 desc_allocator:
669 Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
670 valid_ash_memory_types: u32,
671 naga_options: naga::back::spv::Options<'static>,
672 #[cfg(feature = "renderdoc")]
673 render_doc: crate::auxil::renderdoc::RenderDoc,
674 counters: Arc<wgt::HalCounters>,
675}
676
677impl Drop for Device {
678 fn drop(&mut self) {
679 unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
680 unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
681 }
682}
683
684/// Semaphores for forcing queue submissions to run in order.
685///
686/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
687/// ordered, then the first submission will finish on the GPU before the second
688/// submission begins. To get this behavior on Vulkan we need to pass semaphores
689/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
690/// and to signal when their execution is done.
691///
692/// Normally this can be done with a single semaphore, waited on and then
693/// signalled for each submission. At any given time there's exactly one
694/// submission that would signal the semaphore, and exactly one waiting on it,
695/// as Vulkan requires.
696///
697/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
698/// hang if we use a single semaphore. The workaround is to alternate between
699/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
700/// the workaround until, say, Oct 2026.
701///
702/// [`wgpu_hal::Queue`]: crate::Queue
703/// [`submit`]: crate::Queue::submit
704/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
705/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
706#[derive(Clone)]
707struct RelaySemaphores {
708 /// The semaphore the next submission should wait on before beginning
709 /// execution on the GPU. This is `None` for the first submission, which
710 /// should not wait on anything at all.
711 wait: Option<vk::Semaphore>,
712
713 /// The semaphore the next submission should signal when it has finished
714 /// execution on the GPU.
715 signal: vk::Semaphore,
716}
717
718impl RelaySemaphores {
719 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
720 Ok(Self {
721 wait: None,
722 signal: device.new_binary_semaphore()?,
723 })
724 }
725
726 /// Advances the semaphores, returning the semaphores that should be used for a submission.
727 fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
728 let old = self.clone();
729
730 // Build the state for the next submission.
731 match self.wait {
732 None => {
733 // The `old` values describe the first submission to this queue.
734 // The second submission should wait on `old.signal`, and then
735 // signal a new semaphore which we'll create now.
736 self.wait = Some(old.signal);
737 self.signal = device.new_binary_semaphore()?;
738 }
739 Some(ref mut wait) => {
740 // What this submission signals, the next should wait.
741 mem::swap(wait, &mut self.signal);
742 }
743 };
744
745 Ok(old)
746 }
747
748 /// Destroys the semaphores.
749 unsafe fn destroy(&self, device: &ash::Device) {
750 unsafe {
751 if let Some(wait) = self.wait {
752 device.destroy_semaphore(wait, None);
753 }
754 device.destroy_semaphore(self.signal, None);
755 }
756 }
757}
758
759pub struct Queue {
760 raw: vk::Queue,
761 swapchain_fn: khr::swapchain::Device,
762 device: Arc<DeviceShared>,
763 family_index: u32,
764 relay_semaphores: Mutex<RelaySemaphores>,
765 signal_semaphores: Mutex<SemaphoreList>,
766}
767
768impl Queue {
769 pub fn as_raw(&self) -> vk::Queue {
770 self.raw
771 }
772}
773
774impl Drop for Queue {
775 fn drop(&mut self) {
776 unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
777 }
778}
779#[derive(Debug)]
780enum BufferMemoryBacking {
781 Managed(gpu_alloc::MemoryBlock<vk::DeviceMemory>),
782 VulkanMemory {
783 memory: vk::DeviceMemory,
784 offset: u64,
785 size: u64,
786 },
787}
788impl BufferMemoryBacking {
789 fn memory(&self) -> &vk::DeviceMemory {
790 match self {
791 Self::Managed(m) => m.memory(),
792 Self::VulkanMemory { memory, .. } => memory,
793 }
794 }
795 fn offset(&self) -> u64 {
796 match self {
797 Self::Managed(m) => m.offset(),
798 Self::VulkanMemory { offset, .. } => *offset,
799 }
800 }
801 fn size(&self) -> u64 {
802 match self {
803 Self::Managed(m) => m.size(),
804 Self::VulkanMemory { size, .. } => *size,
805 }
806 }
807}
808#[derive(Debug)]
809pub struct Buffer {
810 raw: vk::Buffer,
811 block: Option<Mutex<BufferMemoryBacking>>,
812}
813impl Buffer {
814 /// # Safety
815 ///
816 /// - `vk_buffer`'s memory must be managed by the caller
817 /// - Externally imported buffers can't be mapped by `wgpu`
818 pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
819 Self {
820 raw: vk_buffer,
821 block: None,
822 }
823 }
824 /// # Safety
825 /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
826 /// - Externally imported buffers can't be mapped by `wgpu`
827 /// - `offset` and `size` must be valid with the allocation of `memory`
828 pub unsafe fn from_raw_managed(
829 vk_buffer: vk::Buffer,
830 memory: vk::DeviceMemory,
831 offset: u64,
832 size: u64,
833 ) -> Self {
834 Self {
835 raw: vk_buffer,
836 block: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
837 memory,
838 offset,
839 size,
840 })),
841 }
842 }
843}
844
845impl crate::DynBuffer for Buffer {}
846
847#[derive(Debug)]
848pub struct AccelerationStructure {
849 raw: vk::AccelerationStructureKHR,
850 buffer: vk::Buffer,
851 block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
852 compacted_size_query: Option<vk::QueryPool>,
853}
854
855impl crate::DynAccelerationStructure for AccelerationStructure {}
856
857#[derive(Debug)]
858pub struct Texture {
859 raw: vk::Image,
860 drop_guard: Option<crate::DropGuard>,
861 external_memory: Option<vk::DeviceMemory>,
862 block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
863 format: wgt::TextureFormat,
864 copy_size: crate::CopyExtent,
865}
866
867impl crate::DynTexture for Texture {}
868
869impl Texture {
870 /// # Safety
871 ///
872 /// - The image handle must not be manually destroyed
873 pub unsafe fn raw_handle(&self) -> vk::Image {
874 self.raw
875 }
876}
877
878#[derive(Debug)]
879pub struct TextureView {
880 raw_texture: vk::Image,
881 raw: vk::ImageView,
882 layers: NonZeroU32,
883 format: wgt::TextureFormat,
884 raw_format: vk::Format,
885 base_mip_level: u32,
886 dimension: wgt::TextureViewDimension,
887}
888
889impl crate::DynTextureView for TextureView {}
890
891impl TextureView {
892 /// # Safety
893 ///
894 /// - The image view handle must not be manually destroyed
895 pub unsafe fn raw_handle(&self) -> vk::ImageView {
896 self.raw
897 }
898}
899
900#[derive(Debug)]
901pub struct Sampler {
902 raw: vk::Sampler,
903 create_info: vk::SamplerCreateInfo<'static>,
904}
905
906impl crate::DynSampler for Sampler {}
907
908#[derive(Debug)]
909pub struct BindGroupLayout {
910 raw: vk::DescriptorSetLayout,
911 desc_count: gpu_descriptor::DescriptorTotalCount,
912 types: Box<[(vk::DescriptorType, u32)]>,
913 /// Map of binding index to size,
914 binding_arrays: Vec<(u32, NonZeroU32)>,
915}
916
917impl crate::DynBindGroupLayout for BindGroupLayout {}
918
919#[derive(Debug)]
920pub struct PipelineLayout {
921 raw: vk::PipelineLayout,
922 binding_arrays: naga::back::spv::BindingMap,
923}
924
925impl crate::DynPipelineLayout for PipelineLayout {}
926
927#[derive(Debug)]
928pub struct BindGroup {
929 set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
930}
931
932impl crate::DynBindGroup for BindGroup {}
933
934/// Miscellaneous allocation recycling pool for `CommandAllocator`.
935#[derive(Default)]
936struct Temp {
937 marker: Vec<u8>,
938 buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
939 image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
940}
941
942impl Temp {
943 fn clear(&mut self) {
944 self.marker.clear();
945 self.buffer_barriers.clear();
946 self.image_barriers.clear();
947 }
948
949 fn make_c_str(&mut self, name: &str) -> &CStr {
950 self.marker.clear();
951 self.marker.extend_from_slice(name.as_bytes());
952 self.marker.push(0);
953 unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
954 }
955}
956
957#[derive(Clone, Eq, Hash, PartialEq)]
958struct FramebufferKey {
959 raw_pass: vk::RenderPass,
960 attachments: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
961 extent: wgt::Extent3d,
962}
963
964#[derive(Clone, Eq, Hash, PartialEq)]
965struct TempTextureViewKey {
966 texture: vk::Image,
967 format: vk::Format,
968 mip_level: u32,
969 depth_slice: u32,
970}
971
972pub struct CommandEncoder {
973 raw: vk::CommandPool,
974 device: Arc<DeviceShared>,
975
976 /// The current command buffer, if `self` is in the ["recording"]
977 /// state.
978 ///
979 /// ["recording"]: crate::CommandEncoder
980 ///
981 /// If non-`null`, the buffer is in the Vulkan "recording" state.
982 active: vk::CommandBuffer,
983
984 /// What kind of pass we are currently within: compute or render.
985 bind_point: vk::PipelineBindPoint,
986
987 /// Allocation recycling pool for this encoder.
988 temp: Temp,
989
990 /// A pool of available command buffers.
991 ///
992 /// These are all in the Vulkan "initial" state.
993 free: Vec<vk::CommandBuffer>,
994
995 /// A pool of discarded command buffers.
996 ///
997 /// These could be in any Vulkan state except "pending".
998 discarded: Vec<vk::CommandBuffer>,
999
1000 /// If this is true, the active renderpass enabled a debug span,
1001 /// and needs to be disabled on renderpass close.
1002 rpass_debug_marker_active: bool,
1003
1004 /// If set, the end of the next render/compute pass will write a timestamp at
1005 /// the given pool & location.
1006 end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1007
1008 framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1009 temp_texture_views: FastHashMap<TempTextureViewKey, vk::ImageView>,
1010
1011 counters: Arc<wgt::HalCounters>,
1012}
1013
1014impl Drop for CommandEncoder {
1015 fn drop(&mut self) {
1016 // SAFETY:
1017 //
1018 // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1019 // `CommandBuffer` must live until its execution is complete, and that a
1020 // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1021 // Thus, we know that none of our `CommandBuffers` are in the "pending"
1022 // state.
1023 //
1024 // The other VUIDs are pretty obvious.
1025 unsafe {
1026 // `vkDestroyCommandPool` also frees any command buffers allocated
1027 // from that pool, so there's no need to explicitly call
1028 // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1029 // fields.
1030 self.device.raw.destroy_command_pool(self.raw, None);
1031 }
1032
1033 for (_, fb) in self.framebuffers.drain() {
1034 unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1035 }
1036
1037 for (_, view) in self.temp_texture_views.drain() {
1038 unsafe { self.device.raw.destroy_image_view(view, None) };
1039 }
1040
1041 self.counters.command_encoders.sub(1);
1042 }
1043}
1044
1045impl CommandEncoder {
1046 /// # Safety
1047 ///
1048 /// - The command buffer handle must not be manually destroyed
1049 pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1050 self.active
1051 }
1052}
1053
1054impl fmt::Debug for CommandEncoder {
1055 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1056 f.debug_struct("CommandEncoder")
1057 .field("raw", &self.raw)
1058 .finish()
1059 }
1060}
1061
1062#[derive(Debug)]
1063pub struct CommandBuffer {
1064 raw: vk::CommandBuffer,
1065}
1066
1067impl crate::DynCommandBuffer for CommandBuffer {}
1068
1069#[derive(Debug)]
1070#[allow(clippy::large_enum_variant)]
1071pub enum ShaderModule {
1072 Raw(vk::ShaderModule),
1073 Intermediate {
1074 naga_shader: crate::NagaShader,
1075 runtime_checks: wgt::ShaderRuntimeChecks,
1076 },
1077}
1078
1079impl crate::DynShaderModule for ShaderModule {}
1080
1081#[derive(Debug)]
1082pub struct RenderPipeline {
1083 raw: vk::Pipeline,
1084}
1085
1086impl crate::DynRenderPipeline for RenderPipeline {}
1087
1088#[derive(Debug)]
1089pub struct ComputePipeline {
1090 raw: vk::Pipeline,
1091}
1092
1093impl crate::DynComputePipeline for ComputePipeline {}
1094
1095#[derive(Debug)]
1096pub struct PipelineCache {
1097 raw: vk::PipelineCache,
1098}
1099
1100impl crate::DynPipelineCache for PipelineCache {}
1101
1102#[derive(Debug)]
1103pub struct QuerySet {
1104 raw: vk::QueryPool,
1105}
1106
1107impl crate::DynQuerySet for QuerySet {}
1108
1109/// The [`Api::Fence`] type for [`vulkan::Api`].
1110///
1111/// This is an `enum` because there are two possible implementations of
1112/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1113/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1114/// require non-1.0 features.
1115///
1116/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1117/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1118/// otherwise.
1119///
1120/// [`Api::Fence`]: crate::Api::Fence
1121/// [`vulkan::Api`]: Api
1122/// [`Device::create_fence`]: crate::Device::create_fence
1123/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1124/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1125/// [`FencePool`]: Fence::FencePool
1126#[derive(Debug)]
1127pub enum Fence {
1128 /// A Vulkan [timeline semaphore].
1129 ///
1130 /// These are simpler to use than Vulkan fences, since timeline semaphores
1131 /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1132 ///
1133 /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1134 /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1135 TimelineSemaphore(vk::Semaphore),
1136
1137 /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1138 ///
1139 /// The effective [`FenceValue`] of this variant is the greater of
1140 /// `last_completed` and the maximum value associated with a signalled fence
1141 /// in `active`.
1142 ///
1143 /// Fences are available in all versions of Vulkan, but since they only have
1144 /// two states, "signaled" and "unsignaled", we need to use a separate fence
1145 /// for each queue submission we might want to wait for, and remember which
1146 /// [`FenceValue`] each one represents.
1147 ///
1148 /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1149 /// [`FenceValue`]: crate::FenceValue
1150 FencePool {
1151 last_completed: crate::FenceValue,
1152 /// The pending fence values have to be ascending.
1153 active: Vec<(crate::FenceValue, vk::Fence)>,
1154 free: Vec<vk::Fence>,
1155 },
1156}
1157
1158impl crate::DynFence for Fence {}
1159
1160impl Fence {
1161 /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1162 ///
1163 /// As an optimization, assume that we already know that the fence has
1164 /// reached `last_completed`, and don't bother checking fences whose values
1165 /// are less than that: those fences remain in the `active` array only
1166 /// because we haven't called `maintain` yet to clean them up.
1167 ///
1168 /// [`FenceValue`]: crate::FenceValue
1169 fn check_active(
1170 device: &ash::Device,
1171 mut last_completed: crate::FenceValue,
1172 active: &[(crate::FenceValue, vk::Fence)],
1173 ) -> Result<crate::FenceValue, crate::DeviceError> {
1174 for &(value, raw) in active.iter() {
1175 unsafe {
1176 if value > last_completed
1177 && device
1178 .get_fence_status(raw)
1179 .map_err(map_host_device_oom_and_lost_err)?
1180 {
1181 last_completed = value;
1182 }
1183 }
1184 }
1185 Ok(last_completed)
1186 }
1187
1188 /// Return the highest signalled [`FenceValue`] for `self`.
1189 ///
1190 /// [`FenceValue`]: crate::FenceValue
1191 fn get_latest(
1192 &self,
1193 device: &ash::Device,
1194 extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1195 ) -> Result<crate::FenceValue, crate::DeviceError> {
1196 match *self {
1197 Self::TimelineSemaphore(raw) => unsafe {
1198 Ok(match *extension.unwrap() {
1199 ExtensionFn::Extension(ref ext) => ext
1200 .get_semaphore_counter_value(raw)
1201 .map_err(map_host_device_oom_and_lost_err)?,
1202 ExtensionFn::Promoted => device
1203 .get_semaphore_counter_value(raw)
1204 .map_err(map_host_device_oom_and_lost_err)?,
1205 })
1206 },
1207 Self::FencePool {
1208 last_completed,
1209 ref active,
1210 free: _,
1211 } => Self::check_active(device, last_completed, active),
1212 }
1213 }
1214
1215 /// Trim the internal state of this [`Fence`].
1216 ///
1217 /// This function has no externally visible effect, but you should call it
1218 /// periodically to keep this fence's resource consumption under control.
1219 ///
1220 /// For fences using the [`FencePool`] implementation, this function
1221 /// recycles fences that have been signaled. If you don't call this,
1222 /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1223 /// time it's called.
1224 ///
1225 /// [`FencePool`]: Fence::FencePool
1226 /// [`Queue::submit`]: crate::Queue::submit
1227 fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1228 match *self {
1229 Self::TimelineSemaphore(_) => {}
1230 Self::FencePool {
1231 ref mut last_completed,
1232 ref mut active,
1233 ref mut free,
1234 } => {
1235 let latest = Self::check_active(device, *last_completed, active)?;
1236 let base_free = free.len();
1237 for &(value, raw) in active.iter() {
1238 if value <= latest {
1239 free.push(raw);
1240 }
1241 }
1242 if free.len() != base_free {
1243 active.retain(|&(value, _)| value > latest);
1244 unsafe { device.reset_fences(&free[base_free..]) }
1245 .map_err(map_device_oom_err)?
1246 }
1247 *last_completed = latest;
1248 }
1249 }
1250 Ok(())
1251 }
1252}
1253
1254impl crate::Queue for Queue {
1255 type A = Api;
1256
1257 unsafe fn submit(
1258 &self,
1259 command_buffers: &[&CommandBuffer],
1260 surface_textures: &[&SurfaceTexture],
1261 (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1262 ) -> Result<(), crate::DeviceError> {
1263 let mut fence_raw = vk::Fence::null();
1264
1265 let mut wait_stage_masks = Vec::new();
1266 let mut wait_semaphores = Vec::new();
1267 let mut signal_semaphores = SemaphoreList::default();
1268
1269 // Double check that the same swapchain image isn't being given to us multiple times,
1270 // as that will deadlock when we try to lock them all.
1271 debug_assert!(
1272 {
1273 let mut check = HashSet::with_capacity(surface_textures.len());
1274 // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
1275 for st in surface_textures {
1276 check.insert(Arc::as_ptr(&st.surface_semaphores));
1277 }
1278 check.len() == surface_textures.len()
1279 },
1280 "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1281 );
1282
1283 let locked_swapchain_semaphores = surface_textures
1284 .iter()
1285 .map(|st| {
1286 st.surface_semaphores
1287 .try_lock()
1288 .expect("Failed to lock surface semaphore.")
1289 })
1290 .collect::<Vec<_>>();
1291
1292 for mut swapchain_semaphore in locked_swapchain_semaphores {
1293 swapchain_semaphore.set_used_fence_value(signal_value);
1294
1295 // If we're the first submission to operate on this image, wait on
1296 // its acquire semaphore, to make sure the presentation engine is
1297 // done with it.
1298 if let Some(sem) = swapchain_semaphore.get_acquire_wait_semaphore() {
1299 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1300 wait_semaphores.push(sem);
1301 }
1302
1303 // Get a semaphore to signal when we're done writing to this surface
1304 // image. Presentation of this image will wait for this.
1305 let signal_semaphore = swapchain_semaphore.get_submit_signal_semaphore(&self.device)?;
1306 signal_semaphores.push_binary(signal_semaphore);
1307 }
1308
1309 let mut guard = self.signal_semaphores.lock();
1310 if !guard.is_empty() {
1311 signal_semaphores.append(&mut guard);
1312 }
1313
1314 // In order for submissions to be strictly ordered, we encode a dependency between each submission
1315 // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1316 let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1317
1318 if let Some(sem) = semaphore_state.wait {
1319 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1320 wait_semaphores.push(sem);
1321 }
1322
1323 signal_semaphores.push_binary(semaphore_state.signal);
1324
1325 // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1326 signal_fence.maintain(&self.device.raw)?;
1327 match *signal_fence {
1328 Fence::TimelineSemaphore(raw) => {
1329 signal_semaphores.push_timeline(raw, signal_value);
1330 }
1331 Fence::FencePool {
1332 ref mut active,
1333 ref mut free,
1334 ..
1335 } => {
1336 fence_raw = match free.pop() {
1337 Some(raw) => raw,
1338 None => unsafe {
1339 self.device
1340 .raw
1341 .create_fence(&vk::FenceCreateInfo::default(), None)
1342 .map_err(map_host_device_oom_err)?
1343 },
1344 };
1345 active.push((signal_value, fence_raw));
1346 }
1347 }
1348
1349 let vk_cmd_buffers = command_buffers
1350 .iter()
1351 .map(|cmd| cmd.raw)
1352 .collect::<Vec<_>>();
1353
1354 let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1355
1356 vk_info = vk_info
1357 .wait_semaphores(&wait_semaphores)
1358 .wait_dst_stage_mask(&wait_stage_masks);
1359
1360 let mut vk_timeline_info = mem::MaybeUninit::uninit();
1361 vk_info = signal_semaphores.add_to_submit(vk_info, &mut vk_timeline_info);
1362
1363 profiling::scope!("vkQueueSubmit");
1364 unsafe {
1365 self.device
1366 .raw
1367 .queue_submit(self.raw, &[vk_info], fence_raw)
1368 .map_err(map_host_device_oom_and_lost_err)?
1369 };
1370 Ok(())
1371 }
1372
1373 unsafe fn present(
1374 &self,
1375 surface: &Surface,
1376 texture: SurfaceTexture,
1377 ) -> Result<(), crate::SurfaceError> {
1378 let mut swapchain = surface.swapchain.write();
1379 let ssc = swapchain.as_mut().unwrap();
1380 let mut swapchain_semaphores = texture.surface_semaphores.lock();
1381
1382 let swapchains = [ssc.raw];
1383 let image_indices = [texture.index];
1384 let vk_info = vk::PresentInfoKHR::default()
1385 .swapchains(&swapchains)
1386 .image_indices(&image_indices)
1387 .wait_semaphores(swapchain_semaphores.get_present_wait_semaphores());
1388
1389 let mut display_timing;
1390 let present_times;
1391 let vk_info = if let Some(present_time) = ssc.next_present_time.take() {
1392 debug_assert!(
1393 ssc.device
1394 .features
1395 .contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING),
1396 "`next_present_time` should only be set if `VULKAN_GOOGLE_DISPLAY_TIMING` is enabled"
1397 );
1398 present_times = [present_time];
1399 display_timing = vk::PresentTimesInfoGOOGLE::default().times(&present_times);
1400 // SAFETY: We know that VK_GOOGLE_display_timing is present because of the safety contract on `next_present_time`.
1401 vk_info.push_next(&mut display_timing)
1402 } else {
1403 vk_info
1404 };
1405
1406 let suboptimal = {
1407 profiling::scope!("vkQueuePresentKHR");
1408 unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1409 match error {
1410 vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1411 vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1412 // We don't use VK_EXT_full_screen_exclusive
1413 // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
1414 _ => map_host_device_oom_and_lost_err(error).into(),
1415 }
1416 })?
1417 };
1418 if suboptimal {
1419 // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1420 // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1421 // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1422 // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1423 #[cfg(not(target_os = "android"))]
1424 log::warn!("Suboptimal present of frame {}", texture.index);
1425 }
1426 Ok(())
1427 }
1428
1429 unsafe fn get_timestamp_period(&self) -> f32 {
1430 self.device.timestamp_period
1431 }
1432}
1433
1434impl Queue {
1435 pub fn raw_device(&self) -> &ash::Device {
1436 &self.device.raw
1437 }
1438
1439 pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1440 let mut guard = self.signal_semaphores.lock();
1441 if let Some(value) = semaphore_value {
1442 guard.push_timeline(semaphore, value);
1443 } else {
1444 guard.push_binary(semaphore);
1445 }
1446 }
1447}
1448
1449/// Maps
1450///
1451/// - VK_ERROR_OUT_OF_HOST_MEMORY
1452/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1453fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1454 match err {
1455 vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1456 get_oom_err(err)
1457 }
1458 e => get_unexpected_err(e),
1459 }
1460}
1461
1462/// Maps
1463///
1464/// - VK_ERROR_OUT_OF_HOST_MEMORY
1465/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1466/// - VK_ERROR_DEVICE_LOST
1467fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1468 match err {
1469 vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1470 other => map_host_device_oom_err(other),
1471 }
1472}
1473
1474/// Maps
1475///
1476/// - VK_ERROR_OUT_OF_HOST_MEMORY
1477/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1478/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1479fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1480 // We don't use VK_KHR_buffer_device_address
1481 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1482 map_host_device_oom_err(err)
1483}
1484
1485/// Maps
1486///
1487/// - VK_ERROR_OUT_OF_HOST_MEMORY
1488fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1489 match err {
1490 vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1491 e => get_unexpected_err(e),
1492 }
1493}
1494
1495/// Maps
1496///
1497/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1498fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1499 match err {
1500 vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1501 e => get_unexpected_err(e),
1502 }
1503}
1504
1505/// Maps
1506///
1507/// - VK_ERROR_OUT_OF_HOST_MEMORY
1508/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1509fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1510 // We don't use VK_KHR_buffer_device_address
1511 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1512 map_host_oom_err(err)
1513}
1514
1515/// Maps
1516///
1517/// - VK_ERROR_OUT_OF_HOST_MEMORY
1518/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1519/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1520/// - VK_ERROR_INVALID_SHADER_NV
1521fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1522 // We don't use VK_EXT_pipeline_creation_cache_control
1523 // VK_PIPELINE_COMPILE_REQUIRED_EXT
1524 // We don't use VK_NV_glsl_shader
1525 // VK_ERROR_INVALID_SHADER_NV
1526 map_host_device_oom_err(err)
1527}
1528
1529/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1530/// feature flag is enabled.
1531fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1532 #[cfg(feature = "internal_error_panic")]
1533 panic!("Unexpected Vulkan error: {_err:?}");
1534
1535 #[allow(unreachable_code)]
1536 crate::DeviceError::Unexpected
1537}
1538
1539/// Returns [`crate::DeviceError::OutOfMemory`].
1540fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1541 crate::DeviceError::OutOfMemory
1542}
1543
1544/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1545/// feature flag is enabled.
1546fn get_lost_err() -> crate::DeviceError {
1547 #[cfg(feature = "device_lost_panic")]
1548 panic!("Device lost");
1549
1550 #[allow(unreachable_code)]
1551 crate::DeviceError::Lost
1552}
1553
1554#[derive(Clone, Copy, Pod, Zeroable)]
1555#[repr(C)]
1556struct RawTlasInstance {
1557 transform: [f32; 12],
1558 custom_data_and_mask: u32,
1559 shader_binding_table_record_offset_and_flags: u32,
1560 acceleration_structure_reference: u64,
1561}
1562
1563/// Arguments to the [`CreateDeviceCallback`].
1564pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1565where
1566 'this: 'pnext,
1567{
1568 /// The extensions to enable for the device. You must not remove anything from this list,
1569 /// but you may add to it.
1570 pub extensions: &'arg mut Vec<&'static CStr>,
1571 /// The physical device features to enable. You may enable features, but must not disable any.
1572 pub device_features: &'arg mut PhysicalDeviceFeatures,
1573 /// The queue create infos for the device. You may add or modify queue create infos as needed.
1574 pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1575 /// The create info for the device. You may add or modify things in the pnext chain, but
1576 /// do not turn features off. Additionally, do not add things to the list of extensions,
1577 /// or to the feature set, as all changes to that member will be overwritten.
1578 pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1579 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1580 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1581 /// don't actually directly use `'this`
1582 _phantom: PhantomData<&'this ()>,
1583}
1584
1585/// Callback to allow changing the vulkan device creation parameters.
1586///
1587/// # Safety:
1588/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1589/// as the create info value will be overwritten.
1590/// - Callback must not remove features.
1591/// - Callback must not change anything to what the instance does not support.
1592pub type CreateDeviceCallback<'this> =
1593 dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1594
1595/// Arguments to the [`CreateInstanceCallback`].
1596pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1597where
1598 'this: 'pnext,
1599{
1600 /// The extensions to enable for the instance. You must not remove anything from this list,
1601 /// but you may add to it.
1602 pub extensions: &'arg mut Vec<&'static CStr>,
1603 /// The create info for the instance. You may add or modify things in the pnext chain, but
1604 /// do not turn features off. Additionally, do not add things to the list of extensions,
1605 /// all changes to that member will be overwritten.
1606 pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1607 /// Vulkan entry point.
1608 pub entry: &'arg ash::Entry,
1609 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1610 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1611 /// don't actually directly use `'this`
1612 _phantom: PhantomData<&'this ()>,
1613}
1614
1615/// Callback to allow changing the vulkan instance creation parameters.
1616///
1617/// # Safety:
1618/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1619/// as the create info value will be overwritten.
1620/// - Callback must not remove features.
1621/// - Callback must not change anything to what the instance does not support.
1622pub type CreateInstanceCallback<'this> =
1623 dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;