wgpu_hal/vulkan/mod.rs
1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8 - temporarily allocating `Vec` on heap, where overhead is permitted
9 - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35
36pub use adapter::PhysicalDeviceFeatures;
37
38use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
39use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
40
41use arrayvec::ArrayVec;
42use ash::{ext, khr, vk};
43use bytemuck::{Pod, Zeroable};
44use hashbrown::HashSet;
45use parking_lot::{Mutex, RwLock};
46
47use naga::FastHashMap;
48use wgt::InternalCounter;
49
50use semaphore_list::SemaphoreList;
51
52const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
53
54#[derive(Clone, Debug)]
55pub struct Api;
56
57impl crate::Api for Api {
58 const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
59
60 type Instance = Instance;
61 type Surface = Surface;
62 type Adapter = Adapter;
63 type Device = Device;
64
65 type Queue = Queue;
66 type CommandEncoder = CommandEncoder;
67 type CommandBuffer = CommandBuffer;
68
69 type Buffer = Buffer;
70 type Texture = Texture;
71 type SurfaceTexture = SurfaceTexture;
72 type TextureView = TextureView;
73 type Sampler = Sampler;
74 type QuerySet = QuerySet;
75 type Fence = Fence;
76 type AccelerationStructure = AccelerationStructure;
77 type PipelineCache = PipelineCache;
78
79 type BindGroupLayout = BindGroupLayout;
80 type BindGroup = BindGroup;
81 type PipelineLayout = PipelineLayout;
82 type ShaderModule = ShaderModule;
83 type RenderPipeline = RenderPipeline;
84 type ComputePipeline = ComputePipeline;
85}
86
87crate::impl_dyn_resource!(
88 Adapter,
89 AccelerationStructure,
90 BindGroup,
91 BindGroupLayout,
92 Buffer,
93 CommandBuffer,
94 CommandEncoder,
95 ComputePipeline,
96 Device,
97 Fence,
98 Instance,
99 PipelineCache,
100 PipelineLayout,
101 QuerySet,
102 Queue,
103 RenderPipeline,
104 Sampler,
105 ShaderModule,
106 Surface,
107 SurfaceTexture,
108 Texture,
109 TextureView
110);
111
112struct DebugUtils {
113 extension: ext::debug_utils::Instance,
114 messenger: vk::DebugUtilsMessengerEXT,
115
116 /// Owning pointer to the debug messenger callback user data.
117 ///
118 /// `InstanceShared::drop` destroys the debug messenger before
119 /// dropping this, so the callback should never receive a dangling
120 /// user data pointer.
121 #[allow(dead_code)]
122 callback_data: Box<DebugUtilsMessengerUserData>,
123}
124
125pub struct DebugUtilsCreateInfo {
126 severity: vk::DebugUtilsMessageSeverityFlagsEXT,
127 message_type: vk::DebugUtilsMessageTypeFlagsEXT,
128 callback_data: Box<DebugUtilsMessengerUserData>,
129}
130
131#[derive(Debug)]
132/// The properties related to the validation layer needed for the
133/// DebugUtilsMessenger for their workarounds
134struct ValidationLayerProperties {
135 /// Validation layer description, from `vk::LayerProperties`.
136 layer_description: CString,
137
138 /// Validation layer specification version, from `vk::LayerProperties`.
139 layer_spec_version: u32,
140}
141
142/// User data needed by `instance::debug_utils_messenger_callback`.
143///
144/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
145/// pointer refers to one of these values.
146#[derive(Debug)]
147pub struct DebugUtilsMessengerUserData {
148 /// The properties related to the validation layer, if present
149 validation_layer_properties: Option<ValidationLayerProperties>,
150
151 /// If the OBS layer is present. OBS never increments the version of their layer,
152 /// so there's no reason to have the version.
153 has_obs_layer: bool,
154}
155
156pub struct InstanceShared {
157 raw: ash::Instance,
158 extensions: Vec<&'static CStr>,
159 drop_guard: Option<crate::DropGuard>,
160 flags: wgt::InstanceFlags,
161 memory_budget_thresholds: wgt::MemoryBudgetThresholds,
162 debug_utils: Option<DebugUtils>,
163 get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
164 entry: ash::Entry,
165 has_nv_optimus: bool,
166 android_sdk_version: u32,
167 /// The instance API version.
168 ///
169 /// Which is the version of Vulkan supported for instance-level functionality.
170 ///
171 /// It is associated with a `VkInstance` and its children,
172 /// except for a `VkPhysicalDevice` and its children.
173 instance_api_version: u32,
174}
175
176pub struct Instance {
177 shared: Arc<InstanceShared>,
178}
179
180/// Semaphore used to acquire a swapchain image.
181#[derive(Debug)]
182struct SwapchainAcquireSemaphore {
183 /// A semaphore that is signaled when this image is safe for us to modify.
184 ///
185 /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
186 /// image that we should use, that image may actually still be in use by the
187 /// presentation engine, and is not yet safe to modify. However, that
188 /// function does accept a semaphore that it will signal when the image is
189 /// indeed safe to begin messing with.
190 ///
191 /// This semaphore is:
192 ///
193 /// - waited for by the first queue submission to operate on this image
194 /// since it was acquired, and
195 ///
196 /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
197 /// for us to use.
198 ///
199 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
200 acquire: vk::Semaphore,
201
202 /// True if the next command submission operating on this image should wait
203 /// for [`acquire`].
204 ///
205 /// We must wait for `acquire` before drawing to this swapchain image, but
206 /// because `wgpu-hal` queue submissions are always strongly ordered, only
207 /// the first submission that works with a swapchain image actually needs to
208 /// wait. We set this flag when this image is acquired, and clear it the
209 /// first time it's passed to [`Queue::submit`] as a surface texture.
210 ///
211 /// Additionally, semaphores can only be waited on once, so we need to ensure
212 /// that we only actually pass this semaphore to the first submission that
213 /// uses that image.
214 ///
215 /// [`acquire`]: SwapchainAcquireSemaphore::acquire
216 /// [`Queue::submit`]: crate::Queue::submit
217 should_wait_for_acquire: bool,
218
219 /// The fence value of the last command submission that wrote to this image.
220 ///
221 /// The next time we try to acquire this image, we'll block until
222 /// this submission finishes, proving that [`acquire`] is ready to
223 /// pass to `vkAcquireNextImageKHR` again.
224 ///
225 /// [`acquire`]: SwapchainAcquireSemaphore::acquire
226 previously_used_submission_index: crate::FenceValue,
227}
228
229impl SwapchainAcquireSemaphore {
230 fn new(device: &DeviceShared, index: usize) -> Result<Self, crate::DeviceError> {
231 Ok(Self {
232 acquire: device
233 .new_binary_semaphore(&format!("SwapchainImageSemaphore: Index {index} acquire"))?,
234 should_wait_for_acquire: true,
235 previously_used_submission_index: 0,
236 })
237 }
238
239 /// Sets the fence value which the next acquire will wait for. This prevents
240 /// the semaphore from being used while the previous submission is still in flight.
241 fn set_used_fence_value(&mut self, value: crate::FenceValue) {
242 self.previously_used_submission_index = value;
243 }
244
245 /// Return the semaphore that commands drawing to this image should wait for, if any.
246 ///
247 /// This only returns `Some` once per acquisition; see
248 /// [`SwapchainAcquireSemaphore::should_wait_for_acquire`] for details.
249 fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
250 if self.should_wait_for_acquire {
251 self.should_wait_for_acquire = false;
252 Some(self.acquire)
253 } else {
254 None
255 }
256 }
257
258 /// Indicates the cpu-side usage of this semaphore has finished for the frame,
259 /// so reset internal state to be ready for the next frame.
260 fn end_semaphore_usage(&mut self) {
261 // Reset the acquire semaphore, so that the next time we acquire this
262 // image, we can wait for it again.
263 self.should_wait_for_acquire = true;
264 }
265
266 unsafe fn destroy(&self, device: &ash::Device) {
267 unsafe {
268 device.destroy_semaphore(self.acquire, None);
269 }
270 }
271}
272
273#[derive(Debug)]
274struct SwapchainPresentSemaphores {
275 /// A pool of semaphores for ordering presentation after drawing.
276 ///
277 /// The first [`present_index`] semaphores in this vector are:
278 ///
279 /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
280 /// image, and
281 ///
282 /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
283 /// this image, when the submission finishes execution.
284 ///
285 /// This vector accumulates one semaphore per submission that writes to this
286 /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
287 /// requires a semaphore to order it with respect to drawing commands, and
288 /// we can't attach new completion semaphores to a command submission after
289 /// it's been submitted. This means that, at submission time, we must create
290 /// the semaphore we might need if the caller's next action is to enqueue a
291 /// presentation of this image.
292 ///
293 /// An alternative strategy would be for presentation to enqueue an empty
294 /// submit, ordered relative to other submits in the usual way, and
295 /// signaling a single presentation semaphore. But we suspect that submits
296 /// are usually expensive enough, and semaphores usually cheap enough, that
297 /// performance-sensitive users will avoid making many submits, so that the
298 /// cost of accumulated semaphores will usually be less than the cost of an
299 /// additional submit.
300 ///
301 /// Only the first [`present_index`] semaphores in the vector are actually
302 /// going to be signalled by submitted commands, and need to be waited for
303 /// by the next present call. Any semaphores beyond that index were created
304 /// for prior presents and are simply being retained for recycling.
305 ///
306 /// [`present_index`]: SwapchainPresentSemaphores::present_index
307 /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
308 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
309 present: Vec<vk::Semaphore>,
310
311 /// The number of semaphores in [`present`] to be signalled for this submission.
312 ///
313 /// [`present`]: SwapchainPresentSemaphores::present
314 present_index: usize,
315
316 /// Which image this semaphore set is used for.
317 frame_index: usize,
318}
319
320impl SwapchainPresentSemaphores {
321 pub fn new(frame_index: usize) -> Self {
322 Self {
323 present: Vec::new(),
324 present_index: 0,
325 frame_index,
326 }
327 }
328
329 /// Return the semaphore that the next submission that writes to this image should
330 /// signal when it's done.
331 ///
332 /// See [`SwapchainPresentSemaphores::present`] for details.
333 fn get_submit_signal_semaphore(
334 &mut self,
335 device: &DeviceShared,
336 ) -> Result<vk::Semaphore, crate::DeviceError> {
337 // Try to recycle a semaphore we created for a previous presentation.
338 let sem = match self.present.get(self.present_index) {
339 Some(sem) => *sem,
340 None => {
341 let sem = device.new_binary_semaphore(&format!(
342 "SwapchainImageSemaphore: Image {} present semaphore {}",
343 self.frame_index, self.present_index
344 ))?;
345 self.present.push(sem);
346 sem
347 }
348 };
349
350 self.present_index += 1;
351
352 Ok(sem)
353 }
354
355 /// Indicates the cpu-side usage of this semaphore has finished for the frame,
356 /// so reset internal state to be ready for the next frame.
357 fn end_semaphore_usage(&mut self) {
358 // Reset the index to 0, so that the next time we get a semaphore, we
359 // start from the beginning of the list.
360 self.present_index = 0;
361 }
362
363 /// Return the semaphores that a presentation of this image should wait on.
364 ///
365 /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
366 /// ends this image's acquisition should wait for. See
367 /// [`SwapchainPresentSemaphores::present`] for details.
368 ///
369 /// Reset `self` to be ready for the next acquisition cycle.
370 ///
371 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
372 fn get_present_wait_semaphores(&mut self) -> Vec<vk::Semaphore> {
373 self.present[0..self.present_index].to_vec()
374 }
375
376 unsafe fn destroy(&self, device: &ash::Device) {
377 unsafe {
378 for sem in &self.present {
379 device.destroy_semaphore(*sem, None);
380 }
381 }
382 }
383}
384
385struct Swapchain {
386 raw: vk::SwapchainKHR,
387 functor: khr::swapchain::Device,
388 device: Arc<DeviceShared>,
389 images: Vec<vk::Image>,
390 /// Fence used to wait on the acquired image.
391 fence: vk::Fence,
392 config: crate::SurfaceConfiguration,
393
394 /// Semaphores used between image acquisition and the first submission
395 /// that uses that image. This is indexed using [`next_acquire_index`].
396 ///
397 /// Because we need to provide this to [`vkAcquireNextImageKHR`], we haven't
398 /// received the swapchain image index for the frame yet, so we cannot use
399 /// that to index it.
400 ///
401 /// Before we pass this to [`vkAcquireNextImageKHR`], we ensure that we wait on
402 /// the submission indicated by [`previously_used_submission_index`]. This enusres
403 /// the semaphore is no longer in use before we use it.
404 ///
405 /// [`next_acquire_index`]: Swapchain::next_acquire_index
406 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
407 /// [`previously_used_submission_index`]: SwapchainAcquireSemaphore::previously_used_submission_index
408 acquire_semaphores: Vec<Arc<Mutex<SwapchainAcquireSemaphore>>>,
409 /// The index of the next acquire semaphore to use.
410 ///
411 /// This is incremented each time we acquire a new image, and wraps around
412 /// to 0 when it reaches the end of [`acquire_semaphores`].
413 ///
414 /// [`acquire_semaphores`]: Swapchain::acquire_semaphores
415 next_acquire_index: usize,
416
417 /// Semaphore sets used between all submissions that write to an image and
418 /// the presentation of that image.
419 ///
420 /// This is indexed by the swapchain image index returned by
421 /// [`vkAcquireNextImageKHR`].
422 ///
423 /// We know it is safe to use these semaphores because use them
424 /// _after_ the acquire semaphore. Because the acquire semaphore
425 /// has been signaled, the previous presentation using that image
426 /// is known-finished, so this semaphore is no longer in use.
427 ///
428 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
429 present_semaphores: Vec<Arc<Mutex<SwapchainPresentSemaphores>>>,
430
431 /// The present timing information which will be set in the next call to [`present()`](crate::Queue::present()).
432 ///
433 /// # Safety
434 ///
435 /// This must only be set if [`wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING`] is enabled, and
436 /// so the VK_GOOGLE_display_timing extension is present.
437 next_present_time: Option<vk::PresentTimeGOOGLE>,
438}
439
440impl Swapchain {
441 /// Mark the current frame finished, advancing to the next acquire semaphore.
442 fn advance_acquire_semaphore(&mut self) {
443 let semaphore_count = self.acquire_semaphores.len();
444 self.next_acquire_index = (self.next_acquire_index + 1) % semaphore_count;
445 }
446
447 /// Get the next acquire semaphore that should be used with this swapchain.
448 fn get_acquire_semaphore(&self) -> Arc<Mutex<SwapchainAcquireSemaphore>> {
449 self.acquire_semaphores[self.next_acquire_index].clone()
450 }
451
452 /// Get the set of present semaphores that should be used with the given image index.
453 fn get_present_semaphores(&self, index: u32) -> Arc<Mutex<SwapchainPresentSemaphores>> {
454 self.present_semaphores[index as usize].clone()
455 }
456}
457
458pub struct Surface {
459 raw: vk::SurfaceKHR,
460 functor: khr::surface::Instance,
461 instance: Arc<InstanceShared>,
462 swapchain: RwLock<Option<Swapchain>>,
463}
464
465impl Surface {
466 pub unsafe fn raw_handle(&self) -> vk::SurfaceKHR {
467 self.raw
468 }
469
470 /// Get the raw Vulkan swapchain associated with this surface.
471 ///
472 /// Returns [`None`] if the surface is not configured.
473 pub fn raw_swapchain(&self) -> Option<vk::SwapchainKHR> {
474 let read = self.swapchain.read();
475 read.as_ref().map(|it| it.raw)
476 }
477
478 /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
479 /// using [VK_GOOGLE_display_timing].
480 ///
481 /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
482 /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
483 ///
484 /// This can also be used to add a "not before" timestamp to the presentation.
485 ///
486 /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
487 ///
488 /// # Panics
489 ///
490 /// - If the surface hasn't been configured.
491 /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
492 ///
493 /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
494 #[track_caller]
495 pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
496 let mut swapchain = self.swapchain.write();
497 let swapchain = swapchain
498 .as_mut()
499 .expect("Surface should have been configured");
500 let features = wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING;
501 if swapchain.device.features.contains(features) {
502 swapchain.next_present_time = Some(present_timing);
503 } else {
504 // Ideally we'd use something like `device.required_features` here, but that's in `wgpu-core`, which we are a dependency of
505 panic!(
506 concat!(
507 "Tried to set display timing properties ",
508 "without the corresponding feature ({:?}) enabled."
509 ),
510 features
511 );
512 }
513 }
514}
515
516#[derive(Debug)]
517pub struct SurfaceTexture {
518 index: u32,
519 texture: Texture,
520 acquire_semaphores: Arc<Mutex<SwapchainAcquireSemaphore>>,
521 present_semaphores: Arc<Mutex<SwapchainPresentSemaphores>>,
522}
523
524impl crate::DynSurfaceTexture for SurfaceTexture {}
525
526impl Borrow<Texture> for SurfaceTexture {
527 fn borrow(&self) -> &Texture {
528 &self.texture
529 }
530}
531
532impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
533 fn borrow(&self) -> &dyn crate::DynTexture {
534 &self.texture
535 }
536}
537
538pub struct Adapter {
539 raw: vk::PhysicalDevice,
540 instance: Arc<InstanceShared>,
541 //queue_families: Vec<vk::QueueFamilyProperties>,
542 known_memory_flags: vk::MemoryPropertyFlags,
543 phd_capabilities: adapter::PhysicalDeviceProperties,
544 phd_features: PhysicalDeviceFeatures,
545 downlevel_flags: wgt::DownlevelFlags,
546 private_caps: PrivateCapabilities,
547 workarounds: Workarounds,
548}
549
550// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
551enum ExtensionFn<T> {
552 /// The loaded function pointer struct for an extension.
553 Extension(T),
554 /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
555 Promoted,
556}
557
558struct DeviceExtensionFunctions {
559 debug_utils: Option<ext::debug_utils::Device>,
560 draw_indirect_count: Option<khr::draw_indirect_count::Device>,
561 timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
562 ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
563 mesh_shading: Option<ext::mesh_shader::Device>,
564}
565
566struct RayTracingDeviceExtensionFunctions {
567 acceleration_structure: khr::acceleration_structure::Device,
568 buffer_device_address: khr::buffer_device_address::Device,
569}
570
571/// Set of internal capabilities, which don't show up in the exposed
572/// device geometry, but affect the code paths taken internally.
573#[derive(Clone, Debug)]
574struct PrivateCapabilities {
575 image_view_usage: bool,
576 timeline_semaphores: bool,
577 texture_d24: bool,
578 texture_d24_s8: bool,
579 texture_s8: bool,
580 /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
581 can_present: bool,
582 non_coherent_map_mask: wgt::BufferAddress,
583 multi_draw_indirect: bool,
584
585 /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
586 ///
587 /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
588 /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
589 /// a given bindgroup binding outside that binding's [accessible
590 /// region][ar]. Enabling `robustBufferAccess` does ensure that
591 /// out-of-bounds reads and writes are not undefined behavior (that's good),
592 /// but still permits out-of-bounds reads to return data from anywhere
593 /// within the buffer, not just the accessible region.
594 ///
595 /// [ar]: ../struct.BufferBinding.html#accessible-region
596 /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
597 robust_buffer_access: bool,
598
599 robust_image_access: bool,
600
601 /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
602 /// [`robustBufferAccess2`] feature.
603 ///
604 /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
605 /// shader accesses to buffer contents. If this feature is not available,
606 /// this backend must have Naga inject bounds checks in the generated
607 /// SPIR-V.
608 ///
609 /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
610 /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
611 /// [ar]: ../struct.BufferBinding.html#accessible-region
612 robust_buffer_access2: bool,
613
614 robust_image_access2: bool,
615 zero_initialize_workgroup_memory: bool,
616 image_format_list: bool,
617 maximum_samplers: u32,
618
619 /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
620 /// (promoted to Vulkan 1.3).
621 ///
622 /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
623 ///
624 /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
625 shader_integer_dot_product: bool,
626
627 /// True if this adapter supports 8-bit integers provided by the
628 /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
629 ///
630 /// Allows shaders to declare the "Int8" capability. Note, however, that this
631 /// feature alone allows the use of 8-bit integers "only in the `Private`,
632 /// `Workgroup` (for non-Block variables), and `Function` storage classes"
633 /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
634 /// `StorageBuffer`), you also need to enable the corresponding feature in
635 /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
636 /// capability (e.g., `StorageBuffer8BitAccess`).
637 ///
638 /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
639 /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
640 shader_int8: bool,
641}
642
643bitflags::bitflags!(
644 /// Workaround flags.
645 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
646 pub struct Workarounds: u32 {
647 /// Only generate SPIR-V for one entry point at a time.
648 const SEPARATE_ENTRY_POINTS = 0x1;
649 /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
650 /// to a subpass resolve attachment array. This nulls out that pointer in that case.
651 const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
652 /// If the following code returns false, then nvidia will end up filling the wrong range.
653 ///
654 /// ```skip
655 /// fn nvidia_succeeds() -> bool {
656 /// # let (copy_length, start_offset) = (0, 0);
657 /// if copy_length >= 4096 {
658 /// if start_offset % 16 != 0 {
659 /// if copy_length == 4096 {
660 /// return true;
661 /// }
662 /// if copy_length % 16 == 0 {
663 /// return false;
664 /// }
665 /// }
666 /// }
667 /// true
668 /// }
669 /// ```
670 ///
671 /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
672 /// if they cover a range of 4096 bytes or more.
673 const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
674 }
675);
676
677#[derive(Clone, Debug, Eq, Hash, PartialEq)]
678struct AttachmentKey {
679 format: vk::Format,
680 layout: vk::ImageLayout,
681 ops: crate::AttachmentOps,
682}
683
684impl AttachmentKey {
685 /// Returns an attachment key for a compatible attachment.
686 fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
687 Self {
688 format,
689 layout,
690 ops: crate::AttachmentOps::all(),
691 }
692 }
693}
694
695#[derive(Clone, Eq, Hash, PartialEq)]
696struct ColorAttachmentKey {
697 base: AttachmentKey,
698 resolve: Option<AttachmentKey>,
699}
700
701#[derive(Clone, Eq, Hash, PartialEq)]
702struct DepthStencilAttachmentKey {
703 base: AttachmentKey,
704 stencil_ops: crate::AttachmentOps,
705}
706
707#[derive(Clone, Eq, Default, Hash, PartialEq)]
708struct RenderPassKey {
709 colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
710 depth_stencil: Option<DepthStencilAttachmentKey>,
711 sample_count: u32,
712 multiview: Option<NonZeroU32>,
713}
714
715struct DeviceShared {
716 raw: ash::Device,
717 family_index: u32,
718 queue_index: u32,
719 raw_queue: vk::Queue,
720 drop_guard: Option<crate::DropGuard>,
721 instance: Arc<InstanceShared>,
722 physical_device: vk::PhysicalDevice,
723 enabled_extensions: Vec<&'static CStr>,
724 extension_fns: DeviceExtensionFunctions,
725 vendor_id: u32,
726 pipeline_cache_validation_key: [u8; 16],
727 timestamp_period: f32,
728 private_caps: PrivateCapabilities,
729 workarounds: Workarounds,
730 features: wgt::Features,
731 render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
732 sampler_cache: Mutex<sampler::SamplerCache>,
733 memory_allocations_counter: InternalCounter,
734
735 /// Because we have cached framebuffers which are not deleted from until
736 /// the device is destroyed, if the implementation of vulkan re-uses handles
737 /// we need some way to differentiate between the old handle and the new handle.
738 /// This factory allows us to have a dedicated identity value for each texture.
739 texture_identity_factory: ResourceIdentityFactory<vk::Image>,
740 /// As above, for texture views.
741 texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
742}
743
744impl Drop for DeviceShared {
745 fn drop(&mut self) {
746 for &raw in self.render_passes.lock().values() {
747 unsafe { self.raw.destroy_render_pass(raw, None) };
748 }
749 if self.drop_guard.is_none() {
750 unsafe { self.raw.destroy_device(None) };
751 }
752 }
753}
754
755pub struct Device {
756 shared: Arc<DeviceShared>,
757 mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
758 desc_allocator:
759 Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
760 valid_ash_memory_types: u32,
761 naga_options: naga::back::spv::Options<'static>,
762 #[cfg(feature = "renderdoc")]
763 render_doc: crate::auxil::renderdoc::RenderDoc,
764 counters: Arc<wgt::HalCounters>,
765}
766
767impl Drop for Device {
768 fn drop(&mut self) {
769 unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
770 unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
771 }
772}
773
774/// Semaphores for forcing queue submissions to run in order.
775///
776/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
777/// ordered, then the first submission will finish on the GPU before the second
778/// submission begins. To get this behavior on Vulkan we need to pass semaphores
779/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
780/// and to signal when their execution is done.
781///
782/// Normally this can be done with a single semaphore, waited on and then
783/// signalled for each submission. At any given time there's exactly one
784/// submission that would signal the semaphore, and exactly one waiting on it,
785/// as Vulkan requires.
786///
787/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
788/// hang if we use a single semaphore. The workaround is to alternate between
789/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
790/// the workaround until, say, Oct 2026.
791///
792/// [`wgpu_hal::Queue`]: crate::Queue
793/// [`submit`]: crate::Queue::submit
794/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
795/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
796#[derive(Clone)]
797struct RelaySemaphores {
798 /// The semaphore the next submission should wait on before beginning
799 /// execution on the GPU. This is `None` for the first submission, which
800 /// should not wait on anything at all.
801 wait: Option<vk::Semaphore>,
802
803 /// The semaphore the next submission should signal when it has finished
804 /// execution on the GPU.
805 signal: vk::Semaphore,
806}
807
808impl RelaySemaphores {
809 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
810 Ok(Self {
811 wait: None,
812 signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
813 })
814 }
815
816 /// Advances the semaphores, returning the semaphores that should be used for a submission.
817 fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
818 let old = self.clone();
819
820 // Build the state for the next submission.
821 match self.wait {
822 None => {
823 // The `old` values describe the first submission to this queue.
824 // The second submission should wait on `old.signal`, and then
825 // signal a new semaphore which we'll create now.
826 self.wait = Some(old.signal);
827 self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
828 }
829 Some(ref mut wait) => {
830 // What this submission signals, the next should wait.
831 mem::swap(wait, &mut self.signal);
832 }
833 };
834
835 Ok(old)
836 }
837
838 /// Destroys the semaphores.
839 unsafe fn destroy(&self, device: &ash::Device) {
840 unsafe {
841 if let Some(wait) = self.wait {
842 device.destroy_semaphore(wait, None);
843 }
844 device.destroy_semaphore(self.signal, None);
845 }
846 }
847}
848
849pub struct Queue {
850 raw: vk::Queue,
851 swapchain_fn: khr::swapchain::Device,
852 device: Arc<DeviceShared>,
853 family_index: u32,
854 relay_semaphores: Mutex<RelaySemaphores>,
855 signal_semaphores: Mutex<SemaphoreList>,
856}
857
858impl Queue {
859 pub fn as_raw(&self) -> vk::Queue {
860 self.raw
861 }
862}
863
864impl Drop for Queue {
865 fn drop(&mut self) {
866 unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
867 }
868}
869#[derive(Debug)]
870enum BufferMemoryBacking {
871 Managed(gpu_alloc::MemoryBlock<vk::DeviceMemory>),
872 VulkanMemory {
873 memory: vk::DeviceMemory,
874 offset: u64,
875 size: u64,
876 },
877}
878impl BufferMemoryBacking {
879 fn memory(&self) -> &vk::DeviceMemory {
880 match self {
881 Self::Managed(m) => m.memory(),
882 Self::VulkanMemory { memory, .. } => memory,
883 }
884 }
885 fn offset(&self) -> u64 {
886 match self {
887 Self::Managed(m) => m.offset(),
888 Self::VulkanMemory { offset, .. } => *offset,
889 }
890 }
891 fn size(&self) -> u64 {
892 match self {
893 Self::Managed(m) => m.size(),
894 Self::VulkanMemory { size, .. } => *size,
895 }
896 }
897}
898#[derive(Debug)]
899pub struct Buffer {
900 raw: vk::Buffer,
901 block: Option<Mutex<BufferMemoryBacking>>,
902}
903impl Buffer {
904 /// # Safety
905 ///
906 /// - `vk_buffer`'s memory must be managed by the caller
907 /// - Externally imported buffers can't be mapped by `wgpu`
908 pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
909 Self {
910 raw: vk_buffer,
911 block: None,
912 }
913 }
914 /// # Safety
915 /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
916 /// - Externally imported buffers can't be mapped by `wgpu`
917 /// - `offset` and `size` must be valid with the allocation of `memory`
918 pub unsafe fn from_raw_managed(
919 vk_buffer: vk::Buffer,
920 memory: vk::DeviceMemory,
921 offset: u64,
922 size: u64,
923 ) -> Self {
924 Self {
925 raw: vk_buffer,
926 block: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
927 memory,
928 offset,
929 size,
930 })),
931 }
932 }
933}
934
935impl crate::DynBuffer for Buffer {}
936
937#[derive(Debug)]
938pub struct AccelerationStructure {
939 raw: vk::AccelerationStructureKHR,
940 buffer: vk::Buffer,
941 block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
942 compacted_size_query: Option<vk::QueryPool>,
943}
944
945impl crate::DynAccelerationStructure for AccelerationStructure {}
946
947#[derive(Debug)]
948pub struct Texture {
949 raw: vk::Image,
950 drop_guard: Option<crate::DropGuard>,
951 external_memory: Option<vk::DeviceMemory>,
952 block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
953 format: wgt::TextureFormat,
954 copy_size: crate::CopyExtent,
955 identity: ResourceIdentity<vk::Image>,
956}
957
958impl crate::DynTexture for Texture {}
959
960impl Texture {
961 /// # Safety
962 ///
963 /// - The image handle must not be manually destroyed
964 pub unsafe fn raw_handle(&self) -> vk::Image {
965 self.raw
966 }
967
968 /// # Safety
969 ///
970 /// - The external memory must not be manually freed
971 pub unsafe fn external_memory(&self) -> Option<vk::DeviceMemory> {
972 self.external_memory
973 }
974}
975
976#[derive(Debug)]
977pub struct TextureView {
978 raw_texture: vk::Image,
979 raw: vk::ImageView,
980 layers: NonZeroU32,
981 format: wgt::TextureFormat,
982 raw_format: vk::Format,
983 base_mip_level: u32,
984 dimension: wgt::TextureViewDimension,
985 texture_identity: ResourceIdentity<vk::Image>,
986 view_identity: ResourceIdentity<vk::ImageView>,
987}
988
989impl crate::DynTextureView for TextureView {}
990
991impl TextureView {
992 /// # Safety
993 ///
994 /// - The image view handle must not be manually destroyed
995 pub unsafe fn raw_handle(&self) -> vk::ImageView {
996 self.raw
997 }
998
999 /// Returns the raw texture view, along with its identity.
1000 fn identified_raw_view(&self) -> IdentifiedTextureView {
1001 IdentifiedTextureView {
1002 raw: self.raw,
1003 identity: self.view_identity,
1004 }
1005 }
1006}
1007
1008#[derive(Debug)]
1009pub struct Sampler {
1010 raw: vk::Sampler,
1011 create_info: vk::SamplerCreateInfo<'static>,
1012}
1013
1014impl crate::DynSampler for Sampler {}
1015
1016/// Information about a binding within a specific BindGroupLayout / BindGroup.
1017/// This will be used to construct a [`naga::back::spv::BindingInfo`], where
1018/// the descriptor set value will be taken from the index of the group.
1019#[derive(Copy, Clone, Debug)]
1020struct BindingInfo {
1021 binding: u32,
1022 binding_array_size: Option<NonZeroU32>,
1023}
1024
1025#[derive(Debug)]
1026pub struct BindGroupLayout {
1027 raw: vk::DescriptorSetLayout,
1028 desc_count: gpu_descriptor::DescriptorTotalCount,
1029 /// Sorted list of entries.
1030 entries: Box<[wgt::BindGroupLayoutEntry]>,
1031 /// Map of original binding index to remapped binding index and optional
1032 /// array size.
1033 binding_map: Vec<(u32, BindingInfo)>,
1034 contains_binding_arrays: bool,
1035}
1036
1037impl crate::DynBindGroupLayout for BindGroupLayout {}
1038
1039#[derive(Debug)]
1040pub struct PipelineLayout {
1041 raw: vk::PipelineLayout,
1042 binding_map: naga::back::spv::BindingMap,
1043}
1044
1045impl crate::DynPipelineLayout for PipelineLayout {}
1046
1047#[derive(Debug)]
1048pub struct BindGroup {
1049 set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
1050}
1051
1052impl crate::DynBindGroup for BindGroup {}
1053
1054/// Miscellaneous allocation recycling pool for `CommandAllocator`.
1055#[derive(Default)]
1056struct Temp {
1057 marker: Vec<u8>,
1058 buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
1059 image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
1060}
1061
1062impl Temp {
1063 fn clear(&mut self) {
1064 self.marker.clear();
1065 self.buffer_barriers.clear();
1066 self.image_barriers.clear();
1067 }
1068
1069 fn make_c_str(&mut self, name: &str) -> &CStr {
1070 self.marker.clear();
1071 self.marker.extend_from_slice(name.as_bytes());
1072 self.marker.push(0);
1073 unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
1074 }
1075}
1076
1077/// Generates unique IDs for each resource of type `T`.
1078///
1079/// Because vk handles are not permanently unique, this
1080/// provides a way to generate unique IDs for each resource.
1081struct ResourceIdentityFactory<T> {
1082 #[cfg(not(target_has_atomic = "64"))]
1083 next_id: Mutex<u64>,
1084 #[cfg(target_has_atomic = "64")]
1085 next_id: core::sync::atomic::AtomicU64,
1086 _phantom: PhantomData<T>,
1087}
1088
1089impl<T> ResourceIdentityFactory<T> {
1090 fn new() -> Self {
1091 Self {
1092 #[cfg(not(target_has_atomic = "64"))]
1093 next_id: Mutex::new(0),
1094 #[cfg(target_has_atomic = "64")]
1095 next_id: core::sync::atomic::AtomicU64::new(0),
1096 _phantom: PhantomData,
1097 }
1098 }
1099
1100 /// Returns a new unique ID for a resource of type `T`.
1101 fn next(&self) -> ResourceIdentity<T> {
1102 #[cfg(not(target_has_atomic = "64"))]
1103 {
1104 let mut next_id = self.next_id.lock();
1105 let id = *next_id;
1106 *next_id += 1;
1107 ResourceIdentity {
1108 id,
1109 _phantom: PhantomData,
1110 }
1111 }
1112
1113 #[cfg(target_has_atomic = "64")]
1114 ResourceIdentity {
1115 id: self
1116 .next_id
1117 .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
1118 _phantom: PhantomData,
1119 }
1120 }
1121}
1122
1123/// A unique identifier for a resource of type `T`.
1124///
1125/// This is used as a hashable key for resources, which
1126/// is permanently unique through the lifetime of the program.
1127#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
1128struct ResourceIdentity<T> {
1129 id: u64,
1130 _phantom: PhantomData<T>,
1131}
1132
1133#[derive(Clone, Eq, Hash, PartialEq)]
1134struct FramebufferKey {
1135 raw_pass: vk::RenderPass,
1136 /// Because this is used as a key in a hash map, we need to include the identity
1137 /// so that this hashes differently, even if the ImageView handles are the same
1138 /// between different views.
1139 attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
1140 /// While this is redundant for calculating the hash, we need access to an array
1141 /// of all the raw ImageViews when we are creating the actual framebuffer,
1142 /// so we store this here.
1143 attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
1144 extent: wgt::Extent3d,
1145}
1146
1147impl FramebufferKey {
1148 fn push_view(&mut self, view: IdentifiedTextureView) {
1149 self.attachment_identities.push(view.identity);
1150 self.attachment_views.push(view.raw);
1151 }
1152}
1153
1154/// A texture view paired with its identity.
1155#[derive(Copy, Clone)]
1156struct IdentifiedTextureView {
1157 raw: vk::ImageView,
1158 identity: ResourceIdentity<vk::ImageView>,
1159}
1160
1161#[derive(Clone, Eq, Hash, PartialEq)]
1162struct TempTextureViewKey {
1163 texture: vk::Image,
1164 /// As this is used in a hashmap, we need to
1165 /// include the identity so that this hashes differently,
1166 /// even if the Image handles are the same between different images.
1167 texture_identity: ResourceIdentity<vk::Image>,
1168 format: vk::Format,
1169 mip_level: u32,
1170 depth_slice: u32,
1171}
1172
1173pub struct CommandEncoder {
1174 raw: vk::CommandPool,
1175 device: Arc<DeviceShared>,
1176
1177 /// The current command buffer, if `self` is in the ["recording"]
1178 /// state.
1179 ///
1180 /// ["recording"]: crate::CommandEncoder
1181 ///
1182 /// If non-`null`, the buffer is in the Vulkan "recording" state.
1183 active: vk::CommandBuffer,
1184
1185 /// What kind of pass we are currently within: compute or render.
1186 bind_point: vk::PipelineBindPoint,
1187
1188 /// Allocation recycling pool for this encoder.
1189 temp: Temp,
1190
1191 /// A pool of available command buffers.
1192 ///
1193 /// These are all in the Vulkan "initial" state.
1194 free: Vec<vk::CommandBuffer>,
1195
1196 /// A pool of discarded command buffers.
1197 ///
1198 /// These could be in any Vulkan state except "pending".
1199 discarded: Vec<vk::CommandBuffer>,
1200
1201 /// If this is true, the active renderpass enabled a debug span,
1202 /// and needs to be disabled on renderpass close.
1203 rpass_debug_marker_active: bool,
1204
1205 /// If set, the end of the next render/compute pass will write a timestamp at
1206 /// the given pool & location.
1207 end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1208
1209 framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1210 temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
1211
1212 counters: Arc<wgt::HalCounters>,
1213}
1214
1215impl Drop for CommandEncoder {
1216 fn drop(&mut self) {
1217 // SAFETY:
1218 //
1219 // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1220 // `CommandBuffer` must live until its execution is complete, and that a
1221 // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1222 // Thus, we know that none of our `CommandBuffers` are in the "pending"
1223 // state.
1224 //
1225 // The other VUIDs are pretty obvious.
1226 unsafe {
1227 // `vkDestroyCommandPool` also frees any command buffers allocated
1228 // from that pool, so there's no need to explicitly call
1229 // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1230 // fields.
1231 self.device.raw.destroy_command_pool(self.raw, None);
1232 }
1233
1234 for (_, fb) in self.framebuffers.drain() {
1235 unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1236 }
1237
1238 for (_, view) in self.temp_texture_views.drain() {
1239 unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1240 }
1241
1242 self.counters.command_encoders.sub(1);
1243 }
1244}
1245
1246impl CommandEncoder {
1247 /// # Safety
1248 ///
1249 /// - The command buffer handle must not be manually destroyed
1250 pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1251 self.active
1252 }
1253}
1254
1255impl fmt::Debug for CommandEncoder {
1256 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1257 f.debug_struct("CommandEncoder")
1258 .field("raw", &self.raw)
1259 .finish()
1260 }
1261}
1262
1263#[derive(Debug)]
1264pub struct CommandBuffer {
1265 raw: vk::CommandBuffer,
1266}
1267
1268impl crate::DynCommandBuffer for CommandBuffer {}
1269
1270#[derive(Debug)]
1271#[allow(clippy::large_enum_variant)]
1272pub enum ShaderModule {
1273 Raw(vk::ShaderModule),
1274 Intermediate {
1275 naga_shader: crate::NagaShader,
1276 runtime_checks: wgt::ShaderRuntimeChecks,
1277 },
1278}
1279
1280impl crate::DynShaderModule for ShaderModule {}
1281
1282#[derive(Debug)]
1283pub struct RenderPipeline {
1284 raw: vk::Pipeline,
1285}
1286
1287impl crate::DynRenderPipeline for RenderPipeline {}
1288
1289#[derive(Debug)]
1290pub struct ComputePipeline {
1291 raw: vk::Pipeline,
1292}
1293
1294impl crate::DynComputePipeline for ComputePipeline {}
1295
1296#[derive(Debug)]
1297pub struct PipelineCache {
1298 raw: vk::PipelineCache,
1299}
1300
1301impl crate::DynPipelineCache for PipelineCache {}
1302
1303#[derive(Debug)]
1304pub struct QuerySet {
1305 raw: vk::QueryPool,
1306}
1307
1308impl crate::DynQuerySet for QuerySet {}
1309
1310/// The [`Api::Fence`] type for [`vulkan::Api`].
1311///
1312/// This is an `enum` because there are two possible implementations of
1313/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1314/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1315/// require non-1.0 features.
1316///
1317/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1318/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1319/// otherwise.
1320///
1321/// [`Api::Fence`]: crate::Api::Fence
1322/// [`vulkan::Api`]: Api
1323/// [`Device::create_fence`]: crate::Device::create_fence
1324/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1325/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1326/// [`FencePool`]: Fence::FencePool
1327#[derive(Debug)]
1328pub enum Fence {
1329 /// A Vulkan [timeline semaphore].
1330 ///
1331 /// These are simpler to use than Vulkan fences, since timeline semaphores
1332 /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1333 ///
1334 /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1335 /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1336 TimelineSemaphore(vk::Semaphore),
1337
1338 /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1339 ///
1340 /// The effective [`FenceValue`] of this variant is the greater of
1341 /// `last_completed` and the maximum value associated with a signalled fence
1342 /// in `active`.
1343 ///
1344 /// Fences are available in all versions of Vulkan, but since they only have
1345 /// two states, "signaled" and "unsignaled", we need to use a separate fence
1346 /// for each queue submission we might want to wait for, and remember which
1347 /// [`FenceValue`] each one represents.
1348 ///
1349 /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1350 /// [`FenceValue`]: crate::FenceValue
1351 FencePool {
1352 last_completed: crate::FenceValue,
1353 /// The pending fence values have to be ascending.
1354 active: Vec<(crate::FenceValue, vk::Fence)>,
1355 free: Vec<vk::Fence>,
1356 },
1357}
1358
1359impl crate::DynFence for Fence {}
1360
1361impl Fence {
1362 /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1363 ///
1364 /// As an optimization, assume that we already know that the fence has
1365 /// reached `last_completed`, and don't bother checking fences whose values
1366 /// are less than that: those fences remain in the `active` array only
1367 /// because we haven't called `maintain` yet to clean them up.
1368 ///
1369 /// [`FenceValue`]: crate::FenceValue
1370 fn check_active(
1371 device: &ash::Device,
1372 mut last_completed: crate::FenceValue,
1373 active: &[(crate::FenceValue, vk::Fence)],
1374 ) -> Result<crate::FenceValue, crate::DeviceError> {
1375 for &(value, raw) in active.iter() {
1376 unsafe {
1377 if value > last_completed
1378 && device
1379 .get_fence_status(raw)
1380 .map_err(map_host_device_oom_and_lost_err)?
1381 {
1382 last_completed = value;
1383 }
1384 }
1385 }
1386 Ok(last_completed)
1387 }
1388
1389 /// Return the highest signalled [`FenceValue`] for `self`.
1390 ///
1391 /// [`FenceValue`]: crate::FenceValue
1392 fn get_latest(
1393 &self,
1394 device: &ash::Device,
1395 extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1396 ) -> Result<crate::FenceValue, crate::DeviceError> {
1397 match *self {
1398 Self::TimelineSemaphore(raw) => unsafe {
1399 Ok(match *extension.unwrap() {
1400 ExtensionFn::Extension(ref ext) => ext
1401 .get_semaphore_counter_value(raw)
1402 .map_err(map_host_device_oom_and_lost_err)?,
1403 ExtensionFn::Promoted => device
1404 .get_semaphore_counter_value(raw)
1405 .map_err(map_host_device_oom_and_lost_err)?,
1406 })
1407 },
1408 Self::FencePool {
1409 last_completed,
1410 ref active,
1411 free: _,
1412 } => Self::check_active(device, last_completed, active),
1413 }
1414 }
1415
1416 /// Trim the internal state of this [`Fence`].
1417 ///
1418 /// This function has no externally visible effect, but you should call it
1419 /// periodically to keep this fence's resource consumption under control.
1420 ///
1421 /// For fences using the [`FencePool`] implementation, this function
1422 /// recycles fences that have been signaled. If you don't call this,
1423 /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1424 /// time it's called.
1425 ///
1426 /// [`FencePool`]: Fence::FencePool
1427 /// [`Queue::submit`]: crate::Queue::submit
1428 fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1429 match *self {
1430 Self::TimelineSemaphore(_) => {}
1431 Self::FencePool {
1432 ref mut last_completed,
1433 ref mut active,
1434 ref mut free,
1435 } => {
1436 let latest = Self::check_active(device, *last_completed, active)?;
1437 let base_free = free.len();
1438 for &(value, raw) in active.iter() {
1439 if value <= latest {
1440 free.push(raw);
1441 }
1442 }
1443 if free.len() != base_free {
1444 active.retain(|&(value, _)| value > latest);
1445 unsafe { device.reset_fences(&free[base_free..]) }
1446 .map_err(map_device_oom_err)?
1447 }
1448 *last_completed = latest;
1449 }
1450 }
1451 Ok(())
1452 }
1453}
1454
1455impl crate::Queue for Queue {
1456 type A = Api;
1457
1458 unsafe fn submit(
1459 &self,
1460 command_buffers: &[&CommandBuffer],
1461 surface_textures: &[&SurfaceTexture],
1462 (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1463 ) -> Result<(), crate::DeviceError> {
1464 let mut fence_raw = vk::Fence::null();
1465
1466 let mut wait_stage_masks = Vec::new();
1467 let mut wait_semaphores = Vec::new();
1468 let mut signal_semaphores = SemaphoreList::default();
1469
1470 // Double check that the same swapchain image isn't being given to us multiple times,
1471 // as that will deadlock when we try to lock them all.
1472 debug_assert!(
1473 {
1474 let mut check = HashSet::with_capacity(surface_textures.len());
1475 // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
1476 for st in surface_textures {
1477 check.insert(Arc::as_ptr(&st.acquire_semaphores) as usize);
1478 check.insert(Arc::as_ptr(&st.present_semaphores) as usize);
1479 }
1480 check.len() == surface_textures.len() * 2
1481 },
1482 "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1483 );
1484
1485 let locked_swapchain_semaphores = surface_textures
1486 .iter()
1487 .map(|st| {
1488 let acquire = st
1489 .acquire_semaphores
1490 .try_lock()
1491 .expect("Failed to lock surface acquire semaphore");
1492 let present = st
1493 .present_semaphores
1494 .try_lock()
1495 .expect("Failed to lock surface present semaphore");
1496
1497 (acquire, present)
1498 })
1499 .collect::<Vec<_>>();
1500
1501 for (mut acquire_semaphore, mut present_semaphores) in locked_swapchain_semaphores {
1502 acquire_semaphore.set_used_fence_value(signal_value);
1503
1504 // If we're the first submission to operate on this image, wait on
1505 // its acquire semaphore, to make sure the presentation engine is
1506 // done with it.
1507 if let Some(sem) = acquire_semaphore.get_acquire_wait_semaphore() {
1508 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1509 wait_semaphores.push(sem);
1510 }
1511
1512 // Get a semaphore to signal when we're done writing to this surface
1513 // image. Presentation of this image will wait for this.
1514 let signal_semaphore = present_semaphores.get_submit_signal_semaphore(&self.device)?;
1515 signal_semaphores.push_binary(signal_semaphore);
1516 }
1517
1518 let mut guard = self.signal_semaphores.lock();
1519 if !guard.is_empty() {
1520 signal_semaphores.append(&mut guard);
1521 }
1522
1523 // In order for submissions to be strictly ordered, we encode a dependency between each submission
1524 // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1525 let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1526
1527 if let Some(sem) = semaphore_state.wait {
1528 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1529 wait_semaphores.push(sem);
1530 }
1531
1532 signal_semaphores.push_binary(semaphore_state.signal);
1533
1534 // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1535 signal_fence.maintain(&self.device.raw)?;
1536 match *signal_fence {
1537 Fence::TimelineSemaphore(raw) => {
1538 signal_semaphores.push_timeline(raw, signal_value);
1539 }
1540 Fence::FencePool {
1541 ref mut active,
1542 ref mut free,
1543 ..
1544 } => {
1545 fence_raw = match free.pop() {
1546 Some(raw) => raw,
1547 None => unsafe {
1548 self.device
1549 .raw
1550 .create_fence(&vk::FenceCreateInfo::default(), None)
1551 .map_err(map_host_device_oom_err)?
1552 },
1553 };
1554 active.push((signal_value, fence_raw));
1555 }
1556 }
1557
1558 let vk_cmd_buffers = command_buffers
1559 .iter()
1560 .map(|cmd| cmd.raw)
1561 .collect::<Vec<_>>();
1562
1563 let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1564
1565 vk_info = vk_info
1566 .wait_semaphores(&wait_semaphores)
1567 .wait_dst_stage_mask(&wait_stage_masks);
1568
1569 let mut vk_timeline_info = mem::MaybeUninit::uninit();
1570 vk_info = signal_semaphores.add_to_submit(vk_info, &mut vk_timeline_info);
1571
1572 profiling::scope!("vkQueueSubmit");
1573 unsafe {
1574 self.device
1575 .raw
1576 .queue_submit(self.raw, &[vk_info], fence_raw)
1577 .map_err(map_host_device_oom_and_lost_err)?
1578 };
1579 Ok(())
1580 }
1581
1582 unsafe fn present(
1583 &self,
1584 surface: &Surface,
1585 texture: SurfaceTexture,
1586 ) -> Result<(), crate::SurfaceError> {
1587 let mut swapchain = surface.swapchain.write();
1588 let ssc = swapchain.as_mut().unwrap();
1589 let mut acquire_semaphore = texture.acquire_semaphores.lock();
1590 let mut present_semaphores = texture.present_semaphores.lock();
1591
1592 let wait_semaphores = present_semaphores.get_present_wait_semaphores();
1593
1594 // Reset the acquire and present semaphores internal state
1595 // to be ready for the next frame.
1596 //
1597 // We do this before the actual call to present to ensure that
1598 // even if this method errors and early outs, we have reset
1599 // the state for next frame.
1600 acquire_semaphore.end_semaphore_usage();
1601 present_semaphores.end_semaphore_usage();
1602
1603 drop(acquire_semaphore);
1604
1605 let swapchains = [ssc.raw];
1606 let image_indices = [texture.index];
1607 let vk_info = vk::PresentInfoKHR::default()
1608 .swapchains(&swapchains)
1609 .image_indices(&image_indices)
1610 .wait_semaphores(&wait_semaphores);
1611
1612 let mut display_timing;
1613 let present_times;
1614 let vk_info = if let Some(present_time) = ssc.next_present_time.take() {
1615 debug_assert!(
1616 ssc.device
1617 .features
1618 .contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING),
1619 "`next_present_time` should only be set if `VULKAN_GOOGLE_DISPLAY_TIMING` is enabled"
1620 );
1621 present_times = [present_time];
1622 display_timing = vk::PresentTimesInfoGOOGLE::default().times(&present_times);
1623 // SAFETY: We know that VK_GOOGLE_display_timing is present because of the safety contract on `next_present_time`.
1624 vk_info.push_next(&mut display_timing)
1625 } else {
1626 vk_info
1627 };
1628
1629 let suboptimal = {
1630 profiling::scope!("vkQueuePresentKHR");
1631 unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1632 match error {
1633 vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1634 vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1635 // We don't use VK_EXT_full_screen_exclusive
1636 // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
1637 _ => map_host_device_oom_and_lost_err(error).into(),
1638 }
1639 })?
1640 };
1641 if suboptimal {
1642 // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1643 // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1644 // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1645 // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1646 #[cfg(not(target_os = "android"))]
1647 log::warn!("Suboptimal present of frame {}", texture.index);
1648 }
1649 Ok(())
1650 }
1651
1652 unsafe fn get_timestamp_period(&self) -> f32 {
1653 self.device.timestamp_period
1654 }
1655}
1656
1657impl Queue {
1658 pub fn raw_device(&self) -> &ash::Device {
1659 &self.device.raw
1660 }
1661
1662 pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1663 let mut guard = self.signal_semaphores.lock();
1664 if let Some(value) = semaphore_value {
1665 guard.push_timeline(semaphore, value);
1666 } else {
1667 guard.push_binary(semaphore);
1668 }
1669 }
1670}
1671
1672/// Maps
1673///
1674/// - VK_ERROR_OUT_OF_HOST_MEMORY
1675/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1676fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1677 match err {
1678 vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1679 get_oom_err(err)
1680 }
1681 e => get_unexpected_err(e),
1682 }
1683}
1684
1685/// Maps
1686///
1687/// - VK_ERROR_OUT_OF_HOST_MEMORY
1688/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1689/// - VK_ERROR_DEVICE_LOST
1690fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1691 match err {
1692 vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1693 other => map_host_device_oom_err(other),
1694 }
1695}
1696
1697/// Maps
1698///
1699/// - VK_ERROR_OUT_OF_HOST_MEMORY
1700/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1701/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1702fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1703 // We don't use VK_KHR_buffer_device_address
1704 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1705 map_host_device_oom_err(err)
1706}
1707
1708/// Maps
1709///
1710/// - VK_ERROR_OUT_OF_HOST_MEMORY
1711fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1712 match err {
1713 vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1714 e => get_unexpected_err(e),
1715 }
1716}
1717
1718/// Maps
1719///
1720/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1721fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1722 match err {
1723 vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1724 e => get_unexpected_err(e),
1725 }
1726}
1727
1728/// Maps
1729///
1730/// - VK_ERROR_OUT_OF_HOST_MEMORY
1731/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1732fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1733 // We don't use VK_KHR_buffer_device_address
1734 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1735 map_host_oom_err(err)
1736}
1737
1738/// Maps
1739///
1740/// - VK_ERROR_OUT_OF_HOST_MEMORY
1741/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1742/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1743/// - VK_ERROR_INVALID_SHADER_NV
1744fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1745 // We don't use VK_EXT_pipeline_creation_cache_control
1746 // VK_PIPELINE_COMPILE_REQUIRED_EXT
1747 // We don't use VK_NV_glsl_shader
1748 // VK_ERROR_INVALID_SHADER_NV
1749 map_host_device_oom_err(err)
1750}
1751
1752/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1753/// feature flag is enabled.
1754fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1755 #[cfg(feature = "internal_error_panic")]
1756 panic!("Unexpected Vulkan error: {_err:?}");
1757
1758 #[allow(unreachable_code)]
1759 crate::DeviceError::Unexpected
1760}
1761
1762/// Returns [`crate::DeviceError::OutOfMemory`].
1763fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1764 crate::DeviceError::OutOfMemory
1765}
1766
1767/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1768/// feature flag is enabled.
1769fn get_lost_err() -> crate::DeviceError {
1770 #[cfg(feature = "device_lost_panic")]
1771 panic!("Device lost");
1772
1773 #[allow(unreachable_code)]
1774 crate::DeviceError::Lost
1775}
1776
1777#[derive(Clone, Copy, Pod, Zeroable)]
1778#[repr(C)]
1779struct RawTlasInstance {
1780 transform: [f32; 12],
1781 custom_data_and_mask: u32,
1782 shader_binding_table_record_offset_and_flags: u32,
1783 acceleration_structure_reference: u64,
1784}
1785
1786/// Arguments to the [`CreateDeviceCallback`].
1787pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1788where
1789 'this: 'pnext,
1790{
1791 /// The extensions to enable for the device. You must not remove anything from this list,
1792 /// but you may add to it.
1793 pub extensions: &'arg mut Vec<&'static CStr>,
1794 /// The physical device features to enable. You may enable features, but must not disable any.
1795 pub device_features: &'arg mut PhysicalDeviceFeatures,
1796 /// The queue create infos for the device. You may add or modify queue create infos as needed.
1797 pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1798 /// The create info for the device. You may add or modify things in the pnext chain, but
1799 /// do not turn features off. Additionally, do not add things to the list of extensions,
1800 /// or to the feature set, as all changes to that member will be overwritten.
1801 pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1802 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1803 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1804 /// don't actually directly use `'this`
1805 _phantom: PhantomData<&'this ()>,
1806}
1807
1808/// Callback to allow changing the vulkan device creation parameters.
1809///
1810/// # Safety:
1811/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1812/// as the create info value will be overwritten.
1813/// - Callback must not remove features.
1814/// - Callback must not change anything to what the instance does not support.
1815pub type CreateDeviceCallback<'this> =
1816 dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1817
1818/// Arguments to the [`CreateInstanceCallback`].
1819pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1820where
1821 'this: 'pnext,
1822{
1823 /// The extensions to enable for the instance. You must not remove anything from this list,
1824 /// but you may add to it.
1825 pub extensions: &'arg mut Vec<&'static CStr>,
1826 /// The create info for the instance. You may add or modify things in the pnext chain, but
1827 /// do not turn features off. Additionally, do not add things to the list of extensions,
1828 /// all changes to that member will be overwritten.
1829 pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1830 /// Vulkan entry point.
1831 pub entry: &'arg ash::Entry,
1832 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1833 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1834 /// don't actually directly use `'this`
1835 _phantom: PhantomData<&'this ()>,
1836}
1837
1838/// Callback to allow changing the vulkan instance creation parameters.
1839///
1840/// # Safety:
1841/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1842/// as the create info value will be overwritten.
1843/// - Callback must not remove features.
1844/// - Callback must not change anything to what the instance does not support.
1845pub type CreateInstanceCallback<'this> =
1846 dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;