wgpu_hal/vulkan/mod.rs
1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8 - temporarily allocating `Vec` on heap, where overhead is permitted
9 - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29pub mod conv;
30mod descriptor;
31mod device;
32mod drm;
33mod instance;
34mod sampler;
35mod semaphore_list;
36mod swapchain;
37
38pub use adapter::PhysicalDeviceFeatures;
39
40use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
41use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
42
43use arrayvec::ArrayVec;
44use ash::{ext, khr, vk};
45use bytemuck::{Pod, Zeroable};
46use hashbrown::HashSet;
47use parking_lot::{Mutex, RwLock};
48
49use naga::FastHashMap;
50use wgt::InternalCounter;
51
52use semaphore_list::SemaphoreList;
53
54use crate::vulkan::semaphore_list::{SemaphoreListMode, SemaphoreType};
55
56const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
57
58#[derive(Clone, Debug)]
59pub struct Api;
60
61impl crate::Api for Api {
62 const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
63
64 type Instance = Instance;
65 type Surface = Surface;
66 type Adapter = Adapter;
67 type Device = Device;
68
69 type Queue = Queue;
70 type CommandEncoder = CommandEncoder;
71 type CommandBuffer = CommandBuffer;
72
73 type Buffer = Buffer;
74 type Texture = Texture;
75 type SurfaceTexture = SurfaceTexture;
76 type TextureView = TextureView;
77 type Sampler = Sampler;
78 type QuerySet = QuerySet;
79 type Fence = Fence;
80 type AccelerationStructure = AccelerationStructure;
81 type PipelineCache = PipelineCache;
82
83 type BindGroupLayout = BindGroupLayout;
84 type BindGroup = BindGroup;
85 type PipelineLayout = PipelineLayout;
86 type ShaderModule = ShaderModule;
87 type RenderPipeline = RenderPipeline;
88 type ComputePipeline = ComputePipeline;
89 type RayTracingPipeline = RayTracingPipeline;
90}
91
92crate::impl_dyn_resource!(
93 Adapter,
94 AccelerationStructure,
95 BindGroup,
96 BindGroupLayout,
97 Buffer,
98 CommandBuffer,
99 CommandEncoder,
100 ComputePipeline,
101 Device,
102 Fence,
103 Instance,
104 PipelineCache,
105 PipelineLayout,
106 QuerySet,
107 Queue,
108 RenderPipeline,
109 RayTracingPipeline,
110 Sampler,
111 ShaderModule,
112 Surface,
113 SurfaceTexture,
114 Texture,
115 TextureView
116);
117
118struct DebugUtils {
119 extension: ext::debug_utils::Instance,
120 messenger: vk::DebugUtilsMessengerEXT,
121
122 /// Owning pointer to the debug messenger callback user data.
123 ///
124 /// `InstanceShared::drop` destroys the debug messenger before
125 /// dropping this, so the callback should never receive a dangling
126 /// user data pointer.
127 #[allow(dead_code)]
128 callback_data: Box<DebugUtilsMessengerUserData>,
129}
130
131#[derive(Debug)]
132pub struct DebugUtilsCreateInfo {
133 severity: vk::DebugUtilsMessageSeverityFlagsEXT,
134 message_type: vk::DebugUtilsMessageTypeFlagsEXT,
135 callback_data: Box<DebugUtilsMessengerUserData>,
136}
137
138#[derive(Debug)]
139/// The properties related to the validation layer needed for the
140/// DebugUtilsMessenger for their workarounds
141struct ValidationLayerProperties {
142 /// Validation layer description, from `vk::LayerProperties`.
143 layer_description: CString,
144
145 /// Validation layer specification version, from `vk::LayerProperties`.
146 layer_spec_version: u32,
147}
148
149/// User data needed by `instance::debug_utils_messenger_callback`.
150///
151/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
152/// pointer refers to one of these values.
153#[derive(Debug)]
154pub struct DebugUtilsMessengerUserData {
155 /// The properties related to the validation layer, if present
156 validation_layer_properties: Option<ValidationLayerProperties>,
157
158 /// If the OBS layer is present. OBS never increments the version of their layer,
159 /// so there's no reason to have the version.
160 has_obs_layer: bool,
161}
162
163pub struct InstanceShared {
164 raw: ash::Instance,
165 extensions: Vec<&'static CStr>,
166 flags: wgt::InstanceFlags,
167 memory_budget_thresholds: wgt::MemoryBudgetThresholds,
168 debug_utils: Option<DebugUtils>,
169 get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
170 entry: ash::Entry,
171 has_nv_optimus: bool,
172 android_sdk_version: u32,
173 /// The instance API version.
174 ///
175 /// Which is the version of Vulkan supported for instance-level functionality.
176 ///
177 /// It is associated with a `VkInstance` and its children,
178 /// except for a `VkPhysicalDevice` and its children.
179 instance_api_version: u32,
180
181 // The `drop_guard` field must be the last field of this struct so it is dropped last.
182 // Do not add new fields after it.
183 drop_guard: Option<crate::DropGuard>,
184}
185
186impl fmt::Debug for InstanceShared {
187 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
188 let Self {
189 raw: _,
190 extensions,
191 flags,
192 memory_budget_thresholds,
193 debug_utils: _,
194 get_physical_device_properties: _,
195 entry: _,
196 has_nv_optimus,
197 android_sdk_version,
198 instance_api_version,
199 drop_guard: _,
200 } = self;
201 f.debug_struct("InstanceShared")
202 .field("extensions", extensions)
203 .field("flags", flags)
204 .field("memory_budget_thresholds", memory_budget_thresholds)
205 .field("has_nv_optimus", has_nv_optimus)
206 .field("android_sdk_version", android_sdk_version)
207 .field("instance_api_version", instance_api_version)
208 .finish_non_exhaustive()
209 }
210}
211
212#[derive(Debug)]
213pub struct Instance {
214 shared: Arc<InstanceShared>,
215}
216
217#[expect(missing_debug_implementations, reason = "TODO?")]
218pub struct Surface {
219 swapchain: RwLock<Option<Box<dyn swapchain::Swapchain>>>,
220 inner: Box<dyn swapchain::Surface>,
221}
222
223impl Surface {
224 /// Returns the raw Vulkan surface handle.
225 ///
226 /// Returns `None` if the surface is a DXGI surface.
227 pub unsafe fn raw_native_handle(&self) -> Option<vk::SurfaceKHR> {
228 Some(
229 self.inner
230 .as_any()
231 .downcast_ref::<swapchain::NativeSurface>()?
232 .as_raw(),
233 )
234 }
235
236 /// Get the raw Vulkan swapchain associated with this surface.
237 ///
238 /// Returns [`None`] if the surface is not configured or if the swapchain
239 /// is a DXGI swapchain.
240 pub fn raw_native_swapchain(&self) -> Option<vk::SwapchainKHR> {
241 let read = self.swapchain.read();
242 Some(
243 read.as_ref()?
244 .as_any()
245 .downcast_ref::<swapchain::NativeSwapchain>()?
246 .as_raw(),
247 )
248 }
249
250 /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
251 /// using [VK_GOOGLE_display_timing].
252 ///
253 /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
254 /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
255 ///
256 /// This can also be used to add a "not before" timestamp to the presentation.
257 ///
258 /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
259 ///
260 /// # Panics
261 ///
262 /// - If the surface hasn't been configured.
263 /// - If the surface has been configured for a DXGI swapchain.
264 /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
265 ///
266 /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
267 #[track_caller]
268 pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
269 let mut swapchain = self.swapchain.write();
270 swapchain
271 .as_mut()
272 .expect("Surface should have been configured")
273 .as_any_mut()
274 .downcast_mut::<swapchain::NativeSwapchain>()
275 .expect("Surface should have a native Vulkan swapchain")
276 .set_next_present_time(present_timing);
277 }
278}
279
280#[derive(Debug)]
281pub struct SurfaceTexture {
282 index: u32,
283 texture: Texture,
284 metadata: Box<dyn swapchain::SurfaceTextureMetadata>,
285}
286
287impl crate::DynSurfaceTexture for SurfaceTexture {}
288
289impl Borrow<Texture> for SurfaceTexture {
290 fn borrow(&self) -> &Texture {
291 &self.texture
292 }
293}
294
295impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
296 fn borrow(&self) -> &dyn crate::DynTexture {
297 &self.texture
298 }
299}
300
301#[derive(Debug)]
302pub struct Adapter {
303 raw: vk::PhysicalDevice,
304 instance: Arc<InstanceShared>,
305 //queue_families: Vec<vk::QueueFamilyProperties>,
306 known_memory_flags: vk::MemoryPropertyFlags,
307 phd_capabilities: adapter::PhysicalDeviceProperties,
308 phd_features: PhysicalDeviceFeatures,
309 downlevel_flags: wgt::DownlevelFlags,
310 private_caps: PrivateCapabilities,
311 workarounds: Workarounds,
312}
313
314// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
315enum ExtensionFn<T> {
316 /// The loaded function pointer struct for an extension.
317 Extension(T),
318 /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
319 Promoted,
320}
321
322struct DeviceExtensionFunctions {
323 debug_utils: Option<ext::debug_utils::Device>,
324 draw_indirect_count: Option<khr::draw_indirect_count::Device>,
325 timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
326 ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
327 ray_tracing_pipelines: Option<khr::ray_tracing_pipeline::Device>,
328 mesh_shading: Option<ext::mesh_shader::Device>,
329 #[cfg_attr(not(unix), allow(dead_code))]
330 external_memory_fd: Option<khr::external_memory_fd::Device>,
331}
332
333struct RayTracingDeviceExtensionFunctions {
334 acceleration_structure: khr::acceleration_structure::Device,
335 buffer_device_address: khr::buffer_device_address::Device,
336}
337
338/// Set of internal capabilities, which don't show up in the exposed
339/// device geometry, but affect the code paths taken internally.
340#[derive(Clone, Debug)]
341struct PrivateCapabilities {
342 image_view_usage: bool,
343 timeline_semaphores: bool,
344 texture_d24: bool,
345 texture_d24_s8: bool,
346 texture_s8: bool,
347 /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
348 can_present: bool,
349 non_coherent_map_mask: wgt::BufferAddress,
350 multi_draw_indirect: bool,
351 max_draw_indirect_count: u32,
352
353 /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
354 ///
355 /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
356 /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
357 /// a given bindgroup binding outside that binding's [accessible
358 /// region][ar]. Enabling `robustBufferAccess` does ensure that
359 /// out-of-bounds reads and writes are not undefined behavior (that's good),
360 /// but still permits out-of-bounds reads to return data from anywhere
361 /// within the buffer, not just the accessible region.
362 ///
363 /// [ar]: ../struct.BufferBinding.html#accessible-region
364 /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
365 robust_buffer_access: bool,
366
367 robust_image_access: bool,
368
369 /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
370 /// [`robustBufferAccess2`] feature.
371 ///
372 /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
373 /// shader accesses to buffer contents. If this feature is not available,
374 /// this backend must have Naga inject bounds checks in the generated
375 /// SPIR-V.
376 ///
377 /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
378 /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
379 /// [ar]: ../struct.BufferBinding.html#accessible-region
380 robust_buffer_access2: bool,
381
382 robust_image_access2: bool,
383 zero_initialize_workgroup_memory: bool,
384 image_format_list: bool,
385 maximum_samplers: u32,
386
387 /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
388 /// (promoted to Vulkan 1.3).
389 ///
390 /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
391 ///
392 /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
393 shader_integer_dot_product: bool,
394
395 /// True if this adapter supports 8-bit integers provided by the
396 /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
397 ///
398 /// Allows shaders to declare the "Int8" capability. Note, however, that this
399 /// feature alone allows the use of 8-bit integers "only in the `Private`,
400 /// `Workgroup` (for non-Block variables), and `Function` storage classes"
401 /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
402 /// `StorageBuffer`), you also need to enable the corresponding feature in
403 /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
404 /// capability (e.g., `StorageBuffer8BitAccess`).
405 ///
406 /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
407 /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
408 shader_int8: bool,
409
410 /// This is done to panic before undefined behavior, and is imperfect.
411 /// Basically, to allow implementations to emulate mv using instancing, if you
412 /// want to draw `n` instances to VR, you must draw `2n` instances, but you
413 /// can never draw more than `u32::MAX` instances. Therefore, when drawing
414 /// multiview on some vulkan implementations, it might restrict the instance
415 /// count, which isn't usually a thing in webgpu. We don't expose this limit
416 /// because its strange, i.e. only occurs on certain vulkan implementations
417 /// if you are drawing more than 128 million instances. We still want to avoid
418 /// undefined behavior in this situation, so we panic if the limit is violated.
419 multiview_instance_index_limit: u32,
420
421 /// BufferUsages::ACCELERATION_STRUCTURE_SCRATCH allows usage as a scratch buffer.
422 /// Vulkan has no way to specify this as a usage, and it maps to other usages, but
423 /// these usages do not have as high of an alignment requirement using the buffer as
424 /// a scratch buffer when building acceleration structures.
425 scratch_buffer_alignment: u32,
426
427 /// `get_raytracing_pipeline_group_data` requires both a group count and a data size.
428 /// The data size parameter is just this * the group count, so we store this to not
429 /// require an unnecessary parameter.
430 ray_tracing_pipeline_group_data_size: u32,
431}
432
433bitflags::bitflags!(
434 /// Workaround flags.
435 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
436 pub struct Workarounds: u32 {
437 /// Only generate SPIR-V for one entry point at a time.
438 const SEPARATE_ENTRY_POINTS = 0x1;
439 /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
440 /// to a subpass resolve attachment array. This nulls out that pointer in that case.
441 const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
442 /// If the following code returns false, then nvidia will end up filling the wrong range.
443 ///
444 /// ```skip
445 /// fn nvidia_succeeds() -> bool {
446 /// # let (copy_length, start_offset) = (0, 0);
447 /// if copy_length >= 4096 {
448 /// if start_offset % 16 != 0 {
449 /// if copy_length == 4096 {
450 /// return true;
451 /// }
452 /// if copy_length % 16 == 0 {
453 /// return false;
454 /// }
455 /// }
456 /// }
457 /// true
458 /// }
459 /// ```
460 ///
461 /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
462 /// if they cover a range of 4096 bytes or more.
463 const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
464 }
465);
466
467#[derive(Clone, Debug, Eq, Hash, PartialEq)]
468struct AttachmentKey {
469 format: vk::Format,
470 layout: vk::ImageLayout,
471 ops: crate::AttachmentOps,
472}
473
474impl AttachmentKey {
475 /// Returns an attachment key for a compatible attachment.
476 fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
477 Self {
478 format,
479 layout,
480 ops: crate::AttachmentOps::all(),
481 }
482 }
483}
484
485#[derive(Clone, Eq, Hash, PartialEq)]
486struct ColorAttachmentKey {
487 base: AttachmentKey,
488 resolve: Option<AttachmentKey>,
489}
490
491#[derive(Clone, Eq, Hash, PartialEq)]
492struct DepthStencilAttachmentKey {
493 base: AttachmentKey,
494 stencil_ops: crate::AttachmentOps,
495}
496
497#[derive(Clone, Eq, Default, Hash, PartialEq)]
498struct RenderPassKey {
499 colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
500 depth_stencil: Option<DepthStencilAttachmentKey>,
501 sample_count: u32,
502 multiview_mask: Option<NonZeroU32>,
503}
504
505struct DeviceShared {
506 raw: ash::Device,
507 family_index: u32,
508 queue_index: u32,
509 raw_queue: vk::Queue,
510 instance: Arc<InstanceShared>,
511 physical_device: vk::PhysicalDevice,
512 enabled_extensions: Vec<&'static CStr>,
513 extension_fns: DeviceExtensionFunctions,
514 vendor_id: u32,
515 pipeline_cache_validation_key: [u8; 16],
516 timestamp_period: f32,
517 private_caps: PrivateCapabilities,
518 workarounds: Workarounds,
519 features: wgt::Features,
520 render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
521 sampler_cache: Mutex<sampler::SamplerCache>,
522 memory_allocations_counter: InternalCounter,
523
524 /// Because we have cached framebuffers which are not deleted from until
525 /// the device is destroyed, if the implementation of vulkan re-uses handles
526 /// we need some way to differentiate between the old handle and the new handle.
527 /// This factory allows us to have a dedicated identity value for each texture.
528 texture_identity_factory: ResourceIdentityFactory<vk::Image>,
529 /// As above, for texture views.
530 texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
531
532 empty_descriptor_set_layout: vk::DescriptorSetLayout,
533
534 // The `drop_guard` field must be the last field of this struct so it is dropped last.
535 // Do not add new fields after it.
536 drop_guard: Option<crate::DropGuard>,
537}
538
539impl Drop for DeviceShared {
540 fn drop(&mut self) {
541 for &raw in self.render_passes.lock().values() {
542 unsafe { self.raw.destroy_render_pass(raw, None) };
543 }
544 unsafe {
545 self.raw
546 .destroy_descriptor_set_layout(self.empty_descriptor_set_layout, None)
547 };
548 if self.drop_guard.is_none() {
549 unsafe { self.raw.destroy_device(None) };
550 }
551 }
552}
553
554#[expect(
555 missing_debug_implementations,
556 reason = "needs work to not be disastrously verbose"
557)]
558pub struct Device {
559 mem_allocator: Mutex<gpu_allocator::vulkan::Allocator>,
560 desc_allocator: Mutex<descriptor::DescriptorAllocator>,
561 valid_ash_memory_types: u32,
562 naga_options: naga::back::spv::Options<'static>,
563 #[cfg(feature = "renderdoc")]
564 render_doc: crate::auxil::renderdoc::RenderDoc,
565 counters: Arc<wgt::HalCounters>,
566 // Struct members are dropped from first to last, put the Device last to ensure that
567 // all resources that depends on it are destroyed before it like the mem_allocator
568 shared: Arc<DeviceShared>,
569}
570
571impl Drop for Device {
572 fn drop(&mut self) {}
573}
574
575/// Semaphores for forcing queue submissions to run in order.
576///
577/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
578/// ordered, then the first submission will finish on the GPU before the second
579/// submission begins. To get this behavior on Vulkan we need to pass semaphores
580/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
581/// and to signal when their execution is done.
582///
583/// Normally this can be done with a single semaphore, waited on and then
584/// signalled for each submission. At any given time there's exactly one
585/// submission that would signal the semaphore, and exactly one waiting on it,
586/// as Vulkan requires.
587///
588/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
589/// hang if we use a single semaphore. The workaround is to alternate between
590/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
591/// the workaround until, say, Oct 2026.
592///
593/// [`wgpu_hal::Queue`]: crate::Queue
594/// [`submit`]: crate::Queue::submit
595/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
596/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
597#[derive(Clone)]
598struct RelaySemaphores {
599 /// The semaphore the next submission should wait on before beginning
600 /// execution on the GPU. This is `None` for the first submission, which
601 /// should not wait on anything at all.
602 wait: Option<vk::Semaphore>,
603
604 /// The semaphore the next submission should signal when it has finished
605 /// execution on the GPU.
606 signal: vk::Semaphore,
607}
608
609impl RelaySemaphores {
610 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
611 Ok(Self {
612 wait: None,
613 signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
614 })
615 }
616
617 /// Advances the semaphores, returning the semaphores that should be used for a submission.
618 fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
619 let old = self.clone();
620
621 // Build the state for the next submission.
622 match self.wait {
623 None => {
624 // The `old` values describe the first submission to this queue.
625 // The second submission should wait on `old.signal`, and then
626 // signal a new semaphore which we'll create now.
627 self.wait = Some(old.signal);
628 self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
629 }
630 Some(ref mut wait) => {
631 // What this submission signals, the next should wait.
632 mem::swap(wait, &mut self.signal);
633 }
634 };
635
636 Ok(old)
637 }
638
639 /// Destroys the semaphores.
640 unsafe fn destroy(&self, device: &ash::Device) {
641 unsafe {
642 if let Some(wait) = self.wait {
643 device.destroy_semaphore(wait, None);
644 }
645 device.destroy_semaphore(self.signal, None);
646 }
647 }
648}
649
650pub struct Queue {
651 raw: vk::Queue,
652 device: Arc<DeviceShared>,
653 family_index: u32,
654 relay_semaphores: Mutex<RelaySemaphores>,
655 signal_semaphores: Mutex<SemaphoreList>,
656 wait_semaphores: Mutex<SemaphoreList>,
657}
658
659impl fmt::Debug for Queue {
660 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
661 let Self {
662 raw: _,
663 device: _,
664 family_index,
665 relay_semaphores: _,
666 signal_semaphores: _,
667 wait_semaphores: _,
668 } = self;
669 f.debug_struct("Queue")
670 .field("family_index", family_index)
671 .finish_non_exhaustive()
672 }
673}
674
675impl Queue {
676 pub fn as_raw(&self) -> vk::Queue {
677 self.raw
678 }
679}
680
681impl Drop for Queue {
682 fn drop(&mut self) {
683 unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
684 }
685}
686#[derive(Debug)]
687enum BufferMemoryBacking {
688 Managed(gpu_allocator::vulkan::Allocation),
689 VulkanMemory {
690 memory: vk::DeviceMemory,
691 offset: u64,
692 size: u64,
693 },
694}
695impl BufferMemoryBacking {
696 fn memory(&self) -> vk::DeviceMemory {
697 match self {
698 Self::Managed(m) => unsafe { m.memory() },
699 Self::VulkanMemory { memory, .. } => *memory,
700 }
701 }
702 fn offset(&self) -> u64 {
703 match self {
704 Self::Managed(m) => m.offset(),
705 Self::VulkanMemory { offset, .. } => *offset,
706 }
707 }
708 fn size(&self) -> u64 {
709 match self {
710 Self::Managed(m) => m.size(),
711 Self::VulkanMemory { size, .. } => *size,
712 }
713 }
714}
715/// Describes who owns a [`Buffer`]'s `vk::Buffer` handle and its backing memory,
716/// and therefore what cleanup is required when the buffer is destroyed.
717#[derive(Debug)]
718enum BufferOwnership {
719 /// wgpu-hal owns the `vk::Buffer` and its backing memory. On cleanup the buffer
720 /// handle is destroyed and the memory is released.
721 Managed(Mutex<BufferMemoryBacking>),
722 /// wgpu-hal owns the `vk::Buffer` handle but the backing memory is kept alive
723 /// by the caller. On cleanup only the buffer handle is destroyed.
724 RawHandle,
725 /// Caller owns the `vk::Buffer` and its backing memory. On cleanup the
726 /// [`crate::DropGuard`] runs the caller's cleanup callback and wgpu-hal touches
727 /// neither the handle nor the memory.
728 External(crate::DropGuard),
729}
730
731#[derive(Debug)]
732pub struct Buffer {
733 raw: vk::Buffer,
734
735 // This field must be last, because it may contain a `DropGuard` which needs to be dropped after all other fields.
736 ownership: BufferOwnership,
737}
738impl Buffer {
739 /// # Safety
740 ///
741 /// - `vk_buffer`'s memory must be managed by the caller
742 /// - Externally imported buffers can't be mapped by `wgpu`
743 pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
744 Self {
745 raw: vk_buffer,
746 ownership: BufferOwnership::RawHandle,
747 }
748 }
749
750 /// # Safety
751 /// - `vk_buffer` must outlive the returned `Buffer`.
752 /// - wgpu-hal will NOT call `vkDestroyBuffer`; the caller remains responsible for the buffer handle's destruction.
753 /// The `drop_callback` runs when the `Buffer` drops and may be used to release caller-side bookkeeping.
754 /// - Externally imported buffers can't be mapped by `wgpu`.
755 pub unsafe fn from_raw_externally_owned(
756 vk_buffer: vk::Buffer,
757 drop_callback: crate::DropCallback,
758 ) -> Self {
759 Self {
760 raw: vk_buffer,
761 ownership: BufferOwnership::External(crate::DropGuard::new(drop_callback)),
762 }
763 }
764
765 /// # Safety
766 /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
767 /// - Externally imported buffers can't be mapped by `wgpu`
768 /// - `offset` and `size` must be valid with the allocation of `memory`
769 pub unsafe fn from_raw_managed(
770 vk_buffer: vk::Buffer,
771 memory: vk::DeviceMemory,
772 offset: u64,
773 size: u64,
774 ) -> Self {
775 Self {
776 raw: vk_buffer,
777 ownership: BufferOwnership::Managed(Mutex::new(BufferMemoryBacking::VulkanMemory {
778 memory,
779 offset,
780 size,
781 })),
782 }
783 }
784
785 /// # Safety
786 /// - The buffer handle must not be manually destroyed
787 pub unsafe fn raw_handle(&self) -> vk::Buffer {
788 self.raw
789 }
790}
791
792impl crate::DynBuffer for Buffer {}
793
794#[derive(Debug)]
795pub struct AccelerationStructure {
796 raw: vk::AccelerationStructureKHR,
797 buffer: vk::Buffer,
798 allocation: gpu_allocator::vulkan::Allocation,
799 compacted_size_query: Option<vk::QueryPool>,
800}
801
802impl crate::DynAccelerationStructure for AccelerationStructure {}
803
804#[derive(Debug)]
805pub enum TextureMemory {
806 // shared memory in GPU allocator (owned by wgpu-hal)
807 Allocation(gpu_allocator::vulkan::Allocation),
808
809 // dedicated memory (owned by wgpu-hal)
810 Dedicated(vk::DeviceMemory),
811
812 // memory not owned by wgpu
813 External,
814}
815
816#[derive(Debug)]
817pub struct Texture {
818 raw: vk::Image,
819 memory: TextureMemory,
820 format: wgt::TextureFormat,
821 copy_size: crate::CopyExtent,
822 identity: ResourceIdentity<vk::Image>,
823
824 // The `drop_guard` field must be the last field of this struct so it is dropped last.
825 // Do not add new fields after it.
826 drop_guard: Option<crate::DropGuard>,
827}
828
829impl crate::DynTexture for Texture {}
830
831impl Texture {
832 /// # Safety
833 ///
834 /// - The image handle must not be manually destroyed
835 pub unsafe fn raw_handle(&self) -> vk::Image {
836 self.raw
837 }
838
839 /// # Safety
840 ///
841 /// - The caller must not free the `vk::DeviceMemory` or
842 /// `gpu_alloc::MemoryBlock` in the returned `TextureMemory`.
843 pub unsafe fn memory(&self) -> &TextureMemory {
844 &self.memory
845 }
846}
847
848#[derive(Debug)]
849pub struct TextureView {
850 raw_texture: vk::Image,
851 raw: vk::ImageView,
852 _layers: NonZeroU32,
853 format: wgt::TextureFormat,
854 raw_format: vk::Format,
855 base_mip_level: u32,
856 dimension: wgt::TextureViewDimension,
857 texture_identity: ResourceIdentity<vk::Image>,
858 view_identity: ResourceIdentity<vk::ImageView>,
859}
860
861impl crate::DynTextureView for TextureView {}
862
863impl TextureView {
864 /// # Safety
865 ///
866 /// - The image view handle must not be manually destroyed
867 pub unsafe fn raw_handle(&self) -> vk::ImageView {
868 self.raw
869 }
870
871 /// Returns the raw texture view, along with its identity.
872 fn identified_raw_view(&self) -> IdentifiedTextureView {
873 IdentifiedTextureView {
874 raw: self.raw,
875 identity: self.view_identity,
876 }
877 }
878}
879
880#[derive(Debug)]
881pub struct Sampler {
882 raw: vk::Sampler,
883 create_info: vk::SamplerCreateInfo<'static>,
884}
885
886impl crate::DynSampler for Sampler {}
887
888/// Information about a binding within a specific BindGroupLayout / BindGroup.
889/// This will be used to construct a [`naga::back::spv::BindingInfo`], where
890/// the descriptor set value will be taken from the index of the group.
891#[derive(Copy, Clone, Debug)]
892struct BindingInfo {
893 binding: u32,
894 binding_array_size: Option<NonZeroU32>,
895}
896
897#[derive(Debug)]
898pub struct BindGroupLayout {
899 raw: vk::DescriptorSetLayout,
900 desc_count: descriptor::DescriptorCounts,
901 /// Sorted list of entries.
902 entries: Box<[wgt::BindGroupLayoutEntry]>,
903 /// Map of original binding index to remapped binding index and optional
904 /// array size.
905 binding_map: Vec<(u32, BindingInfo)>,
906 contains_binding_arrays: bool,
907}
908
909impl crate::DynBindGroupLayout for BindGroupLayout {}
910
911#[derive(Debug)]
912pub struct PipelineLayout {
913 raw: vk::PipelineLayout,
914 binding_map: naga::back::spv::BindingMap,
915}
916
917impl crate::DynPipelineLayout for PipelineLayout {}
918
919#[derive(Debug)]
920pub struct BindGroup {
921 set: descriptor::DescriptorSet,
922}
923
924impl crate::DynBindGroup for BindGroup {}
925
926/// Miscellaneous allocation recycling pool for `CommandAllocator`.
927#[derive(Default)]
928struct Temp {
929 marker: Vec<u8>,
930 buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
931 image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
932}
933
934impl Temp {
935 fn clear(&mut self) {
936 self.marker.clear();
937 self.buffer_barriers.clear();
938 self.image_barriers.clear();
939 }
940
941 fn make_c_str(&mut self, name: &str) -> &CStr {
942 self.marker.clear();
943 self.marker.extend_from_slice(name.as_bytes());
944 self.marker.push(0);
945 unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
946 }
947}
948
949/// Generates unique IDs for each resource of type `T`.
950///
951/// Because vk handles are not permanently unique, this
952/// provides a way to generate unique IDs for each resource.
953struct ResourceIdentityFactory<T> {
954 #[cfg(not(target_has_atomic = "64"))]
955 next_id: Mutex<u64>,
956 #[cfg(target_has_atomic = "64")]
957 next_id: core::sync::atomic::AtomicU64,
958 _phantom: PhantomData<T>,
959}
960
961impl<T> ResourceIdentityFactory<T> {
962 fn new() -> Self {
963 Self {
964 #[cfg(not(target_has_atomic = "64"))]
965 next_id: Mutex::new(0),
966 #[cfg(target_has_atomic = "64")]
967 next_id: core::sync::atomic::AtomicU64::new(0),
968 _phantom: PhantomData,
969 }
970 }
971
972 /// Returns a new unique ID for a resource of type `T`.
973 fn next(&self) -> ResourceIdentity<T> {
974 #[cfg(not(target_has_atomic = "64"))]
975 {
976 let mut next_id = self.next_id.lock();
977 let id = *next_id;
978 *next_id += 1;
979 ResourceIdentity {
980 id,
981 _phantom: PhantomData,
982 }
983 }
984
985 #[cfg(target_has_atomic = "64")]
986 ResourceIdentity {
987 id: self
988 .next_id
989 .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
990 _phantom: PhantomData,
991 }
992 }
993}
994
995/// A unique identifier for a resource of type `T`.
996///
997/// This is used as a hashable key for resources, which
998/// is permanently unique through the lifetime of the program.
999#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
1000struct ResourceIdentity<T> {
1001 id: u64,
1002 _phantom: PhantomData<T>,
1003}
1004
1005#[derive(Clone, Eq, Hash, PartialEq)]
1006struct FramebufferKey {
1007 raw_pass: vk::RenderPass,
1008 /// Because this is used as a key in a hash map, we need to include the identity
1009 /// so that this hashes differently, even if the ImageView handles are the same
1010 /// between different views.
1011 attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
1012 /// While this is redundant for calculating the hash, we need access to an array
1013 /// of all the raw ImageViews when we are creating the actual framebuffer,
1014 /// so we store this here.
1015 attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
1016 extent: wgt::Extent3d,
1017}
1018
1019impl FramebufferKey {
1020 fn push_view(&mut self, view: IdentifiedTextureView) {
1021 self.attachment_identities.push(view.identity);
1022 self.attachment_views.push(view.raw);
1023 }
1024}
1025
1026/// A texture view paired with its identity.
1027#[derive(Copy, Clone)]
1028struct IdentifiedTextureView {
1029 raw: vk::ImageView,
1030 identity: ResourceIdentity<vk::ImageView>,
1031}
1032
1033#[derive(Clone, Eq, Hash, PartialEq)]
1034struct TempTextureViewKey {
1035 texture: vk::Image,
1036 /// As this is used in a hashmap, we need to
1037 /// include the identity so that this hashes differently,
1038 /// even if the Image handles are the same between different images.
1039 texture_identity: ResourceIdentity<vk::Image>,
1040 format: vk::Format,
1041 mip_level: u32,
1042 depth_slice: u32,
1043}
1044
1045// Any state in this struct that may be dirty after an abandoned encoding must
1046// be reset for reused encoders in `begin_encoding`.
1047pub struct CommandEncoder {
1048 raw: vk::CommandPool,
1049 device: Arc<DeviceShared>,
1050
1051 /// The current command buffer, if `self` is in the ["recording"]
1052 /// state.
1053 ///
1054 /// ["recording"]: crate::CommandEncoder
1055 ///
1056 /// If non-`null`, the buffer is in the Vulkan "recording" state.
1057 active: vk::CommandBuffer,
1058
1059 /// What kind of pass we are currently within: compute or render.
1060 bind_point: vk::PipelineBindPoint,
1061
1062 /// Allocation recycling pool for this encoder.
1063 temp: Temp,
1064
1065 /// A pool of available command buffers.
1066 ///
1067 /// These are all in the Vulkan "initial" state.
1068 free: Vec<vk::CommandBuffer>,
1069
1070 /// A pool of discarded command buffers.
1071 ///
1072 /// These could be in any Vulkan state except "pending".
1073 discarded: Vec<vk::CommandBuffer>,
1074
1075 /// If this is true, the active renderpass enabled a debug span,
1076 /// and needs to be disabled on renderpass close.
1077 rpass_debug_marker_active: bool,
1078
1079 /// If set, the end of the next render/compute pass will write a timestamp at
1080 /// the given pool & location.
1081 end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1082
1083 framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1084 temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
1085
1086 counters: Arc<wgt::HalCounters>,
1087
1088 current_pipeline_is_multiview: bool,
1089}
1090
1091impl Drop for CommandEncoder {
1092 fn drop(&mut self) {
1093 // SAFETY:
1094 //
1095 // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1096 // `CommandBuffer` must live until its execution is complete, and that a
1097 // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1098 // Thus, we know that none of our `CommandBuffers` are in the "pending"
1099 // state.
1100 //
1101 // The other VUIDs are pretty obvious.
1102 unsafe {
1103 // `vkDestroyCommandPool` also frees any command buffers allocated
1104 // from that pool, so there's no need to explicitly call
1105 // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1106 // fields.
1107 self.device.raw.destroy_command_pool(self.raw, None);
1108 }
1109
1110 for (_, fb) in self.framebuffers.drain() {
1111 unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1112 }
1113
1114 for (_, view) in self.temp_texture_views.drain() {
1115 unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1116 }
1117
1118 self.counters.command_encoders.sub(1);
1119 }
1120}
1121
1122impl CommandEncoder {
1123 /// # Safety
1124 ///
1125 /// - The command buffer handle must not be manually destroyed
1126 pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1127 self.active
1128 }
1129}
1130
1131impl fmt::Debug for CommandEncoder {
1132 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1133 f.debug_struct("CommandEncoder")
1134 .field("raw", &self.raw)
1135 .finish()
1136 }
1137}
1138
1139#[derive(Debug)]
1140pub struct CommandBuffer {
1141 raw: vk::CommandBuffer,
1142}
1143
1144impl crate::DynCommandBuffer for CommandBuffer {}
1145
1146#[derive(Debug)]
1147pub enum ShaderModule {
1148 Raw(vk::ShaderModule),
1149 Intermediate {
1150 naga_shader: crate::NagaShader,
1151 runtime_checks: wgt::ShaderRuntimeChecks,
1152 },
1153}
1154
1155impl crate::DynShaderModule for ShaderModule {}
1156
1157#[derive(Debug)]
1158pub struct RenderPipeline {
1159 raw: vk::Pipeline,
1160 is_multiview: bool,
1161}
1162
1163impl crate::DynRenderPipeline for RenderPipeline {}
1164
1165#[derive(Debug)]
1166pub struct ComputePipeline {
1167 raw: vk::Pipeline,
1168}
1169
1170impl crate::DynComputePipeline for ComputePipeline {}
1171
1172#[derive(Debug)]
1173pub struct RayTracingPipeline {
1174 raw: vk::Pipeline,
1175}
1176
1177impl crate::DynRayTracingPipeline for RayTracingPipeline {}
1178
1179#[derive(Debug)]
1180pub struct PipelineCache {
1181 raw: vk::PipelineCache,
1182}
1183
1184impl crate::DynPipelineCache for PipelineCache {}
1185
1186#[derive(Debug)]
1187pub struct QuerySet {
1188 raw: vk::QueryPool,
1189}
1190
1191impl crate::DynQuerySet for QuerySet {}
1192
1193/// The [`Api::Fence`] type for [`vulkan::Api`].
1194///
1195/// This is an `enum` because there are two possible implementations of
1196/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1197/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1198/// require non-1.0 features.
1199///
1200/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1201/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1202/// otherwise.
1203///
1204/// [`Api::Fence`]: crate::Api::Fence
1205/// [`vulkan::Api`]: Api
1206/// [`Device::create_fence`]: crate::Device::create_fence
1207/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1208/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1209/// [`FencePool`]: Fence::FencePool
1210#[derive(Debug)]
1211pub enum Fence {
1212 /// A Vulkan [timeline semaphore].
1213 ///
1214 /// These are simpler to use than Vulkan fences, since timeline semaphores
1215 /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1216 ///
1217 /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1218 /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1219 TimelineSemaphore(vk::Semaphore),
1220
1221 /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1222 ///
1223 /// The effective [`FenceValue`] of this variant is the greater of
1224 /// `last_completed` and the maximum value associated with a signalled fence
1225 /// in `active`.
1226 ///
1227 /// Fences are available in all versions of Vulkan, but since they only have
1228 /// two states, "signaled" and "unsignaled", we need to use a separate fence
1229 /// for each queue submission we might want to wait for, and remember which
1230 /// [`FenceValue`] each one represents.
1231 ///
1232 /// One should keep the fence pool read while there are any references to the
1233 /// fences inside of them. This ensures there are no race conditions when
1234 /// resetting the fences
1235 ///
1236 /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1237 /// [`FenceValue`]: crate::FenceValue
1238 FencePool(RwLock<FencePool>),
1239}
1240
1241/// A shared fence type. The arc is expect to have a ref-count of one once a function has finished being called
1242///
1243/// A fence should have access synchronised as fence resetting might happen at any point. Resetting checks the ref-count
1244/// of the fence, so instead of copying the fence, it should have its `Arc` container cloned which shows not to reset
1245/// this fence as it is being used.
1246pub(super) type SynchronizedFence = Arc<vk::Fence>;
1247
1248#[derive(Debug)]
1249pub struct FencePool {
1250 last_completed: crate::FenceValue,
1251 /// The pending fence values have to be ascending.
1252 active: Vec<(crate::FenceValue, SynchronizedFence)>,
1253 // Don't need extra synchronisation around the fences here, if they are used they should be put into active.
1254 free: Vec<vk::Fence>,
1255}
1256
1257impl crate::DynFence for Fence {}
1258
1259impl Fence {
1260 /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1261 ///
1262 /// As an optimization, assume that we already know that the fence has
1263 /// reached `last_completed`, and don't bother checking fences whose values
1264 /// are less than that: those fences remain in the `active` array only
1265 /// because we haven't called `maintain` yet to clean them up.
1266 ///
1267 /// [`FenceValue`]: crate::FenceValue
1268 fn check_active(
1269 device: &ash::Device,
1270 mut last_completed: crate::FenceValue,
1271 active: &[(crate::FenceValue, SynchronizedFence)],
1272 ) -> Result<crate::FenceValue, crate::DeviceError> {
1273 for &(value, ref raw) in active.iter() {
1274 unsafe {
1275 if value > last_completed
1276 && device
1277 // Don't need to clone as active should be from a read or
1278 // write lock which means this is already synchronised.
1279 .get_fence_status(**raw)
1280 .map_err(map_host_device_oom_and_lost_err)?
1281 {
1282 last_completed = value;
1283 }
1284 }
1285 }
1286 Ok(last_completed)
1287 }
1288
1289 /// Return the highest signalled [`FenceValue`] for `self`.
1290 ///
1291 /// [`FenceValue`]: crate::FenceValue
1292 fn get_latest(
1293 &self,
1294 device: &ash::Device,
1295 extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1296 ) -> Result<crate::FenceValue, crate::DeviceError> {
1297 match *self {
1298 Self::TimelineSemaphore(raw) => unsafe {
1299 Ok(match *extension.unwrap() {
1300 ExtensionFn::Extension(ref ext) => ext
1301 .get_semaphore_counter_value(raw)
1302 .map_err(map_host_device_oom_and_lost_err)?,
1303 ExtensionFn::Promoted => device
1304 .get_semaphore_counter_value(raw)
1305 .map_err(map_host_device_oom_and_lost_err)?,
1306 })
1307 },
1308 Self::FencePool(ref pool) => {
1309 let FencePool {
1310 last_completed,
1311 ref active,
1312 free: _,
1313 } = *pool.read();
1314 Self::check_active(device, last_completed, active)
1315 }
1316 }
1317 }
1318
1319 /// Trim the internal state of this [`Fence`].
1320 ///
1321 /// This function has no externally visible effect, but you should call it
1322 /// periodically to keep this fence's resource consumption under control.
1323 ///
1324 /// For fences using the [`FencePool`] implementation, this function
1325 /// recycles fences that have been signaled. If you don't call this,
1326 /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1327 /// time it's called.
1328 ///
1329 /// [`FencePool`]: Fence::FencePool
1330 /// [`Queue::submit`]: crate::Queue::submit
1331 fn maintain(&self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1332 match *self {
1333 Self::TimelineSemaphore(_) => {}
1334 Self::FencePool(ref pool) => {
1335 let FencePool {
1336 ref mut last_completed,
1337 ref mut active,
1338 ref mut free,
1339 } = *pool.write();
1340
1341 let base_free = free.len();
1342 let latest = Self::check_active(device, *last_completed, active)?;
1343
1344 active.retain_mut(|&mut (value, ref mut fence)| {
1345 if value > latest {
1346 true
1347 } else if let Some(fence) = Arc::get_mut(fence) {
1348 // No other references to these, so we have exclusive access. Add them to free and reset them later,
1349 // but drop them from active immediately
1350 free.push(*fence);
1351 false
1352 } else {
1353 // some other function is using it. Although this shouldn't be to long,
1354 // maintain shouldn't block, and it should be cleared up by the next time it happens
1355 true
1356 }
1357 });
1358
1359 if free.len() != base_free {
1360 unsafe { device.reset_fences(&free[base_free..]) }
1361 .map_err(map_device_oom_err)?
1362 }
1363 *last_completed = latest;
1364 }
1365 }
1366 Ok(())
1367 }
1368}
1369
1370impl crate::Queue for Queue {
1371 type A = Api;
1372
1373 unsafe fn submit(
1374 &self,
1375 command_buffers: &[&CommandBuffer],
1376 surface_textures: &[&SurfaceTexture],
1377 (signal_fence, signal_value): (&Fence, crate::FenceValue),
1378 ) -> Result<(), crate::DeviceError> {
1379 let mut fence_raw = vk::Fence::null();
1380
1381 let mut wait_semaphores = SemaphoreList::new(SemaphoreListMode::Wait);
1382 let mut signal_semaphores = SemaphoreList::new(SemaphoreListMode::Signal);
1383
1384 // Double check that the same swapchain image isn't being given to us multiple times,
1385 // as that will deadlock when we try to lock them all.
1386 debug_assert!(
1387 {
1388 let mut check = HashSet::with_capacity(surface_textures.len());
1389 // We compare the Box by pointer, as Eq isn't well defined for SurfaceSemaphores.
1390 for st in surface_textures {
1391 let ptr: *const () = <*const _>::cast(&*st.metadata);
1392 check.insert(ptr as usize);
1393 }
1394 check.len() == surface_textures.len()
1395 },
1396 "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1397 );
1398
1399 let locked_swapchain_semaphores = surface_textures
1400 .iter()
1401 .map(|st| st.metadata.get_semaphore_guard())
1402 .collect::<Vec<_>>();
1403
1404 for mut semaphores in locked_swapchain_semaphores {
1405 semaphores.set_used_fence_value(signal_value);
1406
1407 // If we're the first submission to operate on this image, wait on
1408 // its acquire semaphore, to make sure the presentation engine is
1409 // done with it.
1410 if let Some(sem) = semaphores.get_acquire_wait_semaphore() {
1411 wait_semaphores.push_wait(sem, vk::PipelineStageFlags::TOP_OF_PIPE);
1412 }
1413
1414 // Get a semaphore to signal when we're done writing to this surface
1415 // image. Presentation of this image will wait for this.
1416 let signal_semaphore = semaphores.get_submit_signal_semaphore(&self.device)?;
1417 signal_semaphores.push_signal(signal_semaphore);
1418 }
1419
1420 let mut guard = self.signal_semaphores.lock();
1421 if !guard.is_empty() {
1422 signal_semaphores.append(&mut guard);
1423 }
1424
1425 let mut wait_guard = self.wait_semaphores.lock();
1426 if !wait_guard.is_empty() {
1427 wait_semaphores.append(&mut wait_guard);
1428 }
1429
1430 // In order for submissions to be strictly ordered, we encode a dependency between each submission
1431 // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1432 let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1433
1434 if let Some(sem) = semaphore_state.wait {
1435 wait_semaphores.push_wait(
1436 SemaphoreType::Binary(sem),
1437 vk::PipelineStageFlags::TOP_OF_PIPE,
1438 );
1439 }
1440
1441 signal_semaphores.push_signal(SemaphoreType::Binary(semaphore_state.signal));
1442
1443 // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1444 signal_fence.maintain(&self.device.raw)?;
1445 // Keeping the Arc around is probably unneeded - the fence should never be signaled as it was reset,
1446 // and newer submits should not happen until this submit is done. Therefore, it should be too high
1447 // to be reset.
1448 let shared_fence;
1449 match *signal_fence {
1450 Fence::TimelineSemaphore(raw) => {
1451 signal_semaphores.push_signal(SemaphoreType::Timeline(raw, signal_value));
1452 }
1453 Fence::FencePool(ref pool) => {
1454 let FencePool {
1455 ref mut active,
1456 ref mut free,
1457 ..
1458 } = *pool.write();
1459 shared_fence = match free.pop() {
1460 Some(raw) => Arc::new(raw),
1461 None => unsafe {
1462 let fence = self
1463 .device
1464 .raw
1465 .create_fence(&vk::FenceCreateInfo::default(), None)
1466 .map_err(map_host_device_oom_err)?;
1467 Arc::new(fence)
1468 },
1469 };
1470 fence_raw = *shared_fence;
1471 active.push((signal_value, shared_fence.clone()));
1472 }
1473 }
1474
1475 let vk_cmd_buffers = command_buffers
1476 .iter()
1477 .map(|cmd| cmd.raw)
1478 .collect::<Vec<_>>();
1479
1480 let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1481 let mut vk_timeline_info = mem::MaybeUninit::uninit();
1482 vk_info = SemaphoreList::add_to_submit(
1483 &mut wait_semaphores,
1484 &mut signal_semaphores,
1485 vk_info,
1486 &mut vk_timeline_info,
1487 );
1488
1489 profiling::scope!("vkQueueSubmit");
1490 unsafe {
1491 self.device
1492 .raw
1493 .queue_submit(self.raw, &[vk_info], fence_raw)
1494 .map_err(map_host_device_oom_and_lost_err)?
1495 };
1496 Ok(())
1497 }
1498
1499 unsafe fn present(
1500 &self,
1501 surface: &Surface,
1502 texture: SurfaceTexture,
1503 ) -> Result<(), crate::SurfaceError> {
1504 let mut swapchain = surface.swapchain.write();
1505
1506 unsafe { swapchain.as_mut().unwrap().present(self, texture) }
1507 }
1508
1509 unsafe fn get_timestamp_period(&self) -> f32 {
1510 self.device.timestamp_period
1511 }
1512
1513 unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> {
1514 unsafe { self.device.raw.queue_wait_idle(self.raw) }
1515 .map_err(map_host_device_oom_and_lost_err)
1516 }
1517}
1518
1519impl Queue {
1520 pub fn raw_device(&self) -> &ash::Device {
1521 &self.device.raw
1522 }
1523
1524 pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1525 let mut guard = self.signal_semaphores.lock();
1526 if let Some(value) = semaphore_value {
1527 guard.push_signal(SemaphoreType::Timeline(semaphore, value));
1528 } else {
1529 guard.push_signal(SemaphoreType::Binary(semaphore));
1530 }
1531 }
1532
1533 /// Remove `semaphore` from the pending signal list if it is still present.
1534 ///
1535 /// Returns `true` if the semaphore was found and removed. If the submit
1536 /// already consumed it, this is a harmless no-op that returns `false`.
1537 pub fn remove_signal_semaphore(&self, semaphore: vk::Semaphore) -> bool {
1538 self.signal_semaphores.lock().remove(semaphore)
1539 }
1540
1541 /// Stage a semaphore wait on the next [`crate::Queue::submit`] call.
1542 ///
1543 /// `semaphore_value` selects the kind of payload the wait targets:
1544 ///
1545 /// - `Some(value)` - wait until `semaphore` (a timeline semaphore) has been signalled to at least `value`.
1546 /// - `None` - wait on a binary semaphore signal.
1547 ///
1548 /// `stage` is the pipeline stage at which the wait blocks downstream
1549 /// work (e.g. `vk::PipelineStageFlags::TOP_OF_PIPE` to gate the
1550 /// entire submission, or a more specific stage when only that stage
1551 /// reads the synchronised resource).
1552 pub fn add_wait_semaphore(
1553 &self,
1554 semaphore: vk::Semaphore,
1555 semaphore_value: Option<u64>,
1556 stage: vk::PipelineStageFlags,
1557 ) {
1558 let mut guard = self.wait_semaphores.lock();
1559 if let Some(value) = semaphore_value {
1560 guard.push_wait(SemaphoreType::Timeline(semaphore, value), stage);
1561 } else {
1562 guard.push_wait(SemaphoreType::Binary(semaphore), stage);
1563 }
1564 }
1565
1566 /// Remove `semaphore` from the pending wait list if it is still present.
1567 ///
1568 /// Returns `true` if the semaphore was found and removed. If the submit
1569 /// already consumed it, this is a no-op that returns `false`.
1570 pub fn remove_wait_semaphore(&self, semaphore: vk::Semaphore) -> bool {
1571 self.wait_semaphores.lock().remove(semaphore)
1572 }
1573}
1574
1575/// Maps
1576///
1577/// - VK_ERROR_OUT_OF_HOST_MEMORY
1578/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1579fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1580 match err {
1581 vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1582 get_oom_err(err)
1583 }
1584 e => get_unexpected_err(e),
1585 }
1586}
1587
1588/// Maps
1589///
1590/// - VK_ERROR_OUT_OF_HOST_MEMORY
1591/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1592/// - VK_ERROR_DEVICE_LOST
1593fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1594 match err {
1595 vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1596 other => map_host_device_oom_err(other),
1597 }
1598}
1599
1600/// Maps
1601///
1602/// - VK_ERROR_OUT_OF_HOST_MEMORY
1603/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1604/// - VK_ERROR_FRAGMENTATION
1605fn map_host_device_oom_and_fragmentation_err(err: vk::Result) -> crate::DeviceError {
1606 match err {
1607 vk::Result::ERROR_FRAGMENTATION => get_oom_err(err),
1608 other => map_host_device_oom_err(other),
1609 }
1610}
1611
1612/// Maps
1613///
1614/// - VK_ERROR_OUT_OF_HOST_MEMORY
1615/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1616/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1617fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1618 // We don't use VK_KHR_buffer_device_address
1619 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1620 map_host_device_oom_err(err)
1621}
1622
1623/// Maps
1624///
1625/// - VK_ERROR_OUT_OF_HOST_MEMORY
1626fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1627 match err {
1628 vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1629 e => get_unexpected_err(e),
1630 }
1631}
1632
1633/// Maps
1634///
1635/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1636fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1637 match err {
1638 vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1639 e => get_unexpected_err(e),
1640 }
1641}
1642
1643/// Maps
1644///
1645/// - VK_ERROR_OUT_OF_HOST_MEMORY
1646/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1647fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1648 // We don't use VK_KHR_buffer_device_address
1649 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1650 map_host_oom_err(err)
1651}
1652
1653/// Maps
1654///
1655/// - VK_ERROR_OUT_OF_HOST_MEMORY
1656/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1657/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1658/// - VK_ERROR_INVALID_SHADER_NV
1659fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1660 // We don't use VK_EXT_pipeline_creation_cache_control
1661 // VK_PIPELINE_COMPILE_REQUIRED_EXT
1662 // We don't use VK_NV_glsl_shader
1663 // VK_ERROR_INVALID_SHADER_NV
1664 map_host_device_oom_err(err)
1665}
1666
1667/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1668/// feature flag is enabled.
1669fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1670 #[cfg(feature = "internal_error_panic")]
1671 panic!("Unexpected Vulkan error: {_err:?}");
1672
1673 #[allow(unreachable_code)]
1674 crate::DeviceError::Unexpected
1675}
1676
1677/// Returns [`crate::DeviceError::OutOfMemory`].
1678fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1679 crate::DeviceError::OutOfMemory
1680}
1681
1682/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1683/// feature flag is enabled.
1684fn get_lost_err() -> crate::DeviceError {
1685 #[cfg(feature = "device_lost_panic")]
1686 panic!("Device lost");
1687
1688 #[allow(unreachable_code)]
1689 crate::DeviceError::Lost
1690}
1691
1692#[derive(Clone, Copy, Pod, Zeroable)]
1693#[repr(C)]
1694struct RawTlasInstance {
1695 transform: [f32; 12],
1696 custom_data_and_mask: u32,
1697 shader_binding_table_record_offset_and_flags: u32,
1698 acceleration_structure_reference: u64,
1699}
1700
1701/// Arguments to the [`CreateDeviceCallback`].
1702#[derive(Debug)]
1703pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1704where
1705 'this: 'pnext,
1706{
1707 /// The extensions to enable for the device. You must not remove anything from this list,
1708 /// but you may add to it.
1709 pub extensions: &'arg mut Vec<&'static CStr>,
1710 /// The physical device features to enable. You may enable features, but must not disable any.
1711 pub device_features: &'arg mut PhysicalDeviceFeatures,
1712 /// The queue create infos for the device. You may add or modify queue create infos as needed.
1713 pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1714 /// The create info for the device. You may add or modify things in the pnext chain, but
1715 /// do not turn features off. Additionally, do not add things to the list of extensions,
1716 /// or to the feature set, as all changes to that member will be overwritten.
1717 pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1718 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1719 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1720 /// don't actually directly use `'this`
1721 _phantom: PhantomData<&'this ()>,
1722}
1723
1724/// Callback to allow changing the vulkan device creation parameters.
1725///
1726/// # Safety:
1727/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1728/// as the create info value will be overwritten.
1729/// - Callback must not remove features.
1730/// - Callback must not change anything to what the instance does not support.
1731pub type CreateDeviceCallback<'this> =
1732 dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1733
1734/// Arguments to the [`CreateInstanceCallback`].
1735#[expect(missing_debug_implementations, reason = "TODO?")]
1736pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1737where
1738 'this: 'pnext,
1739{
1740 /// The extensions to enable for the instance. You must not remove anything from this list,
1741 /// but you may add to it.
1742 pub extensions: &'arg mut Vec<&'static CStr>,
1743 /// The create info for the instance. You may add or modify things in the pnext chain, but
1744 /// do not turn features off. Additionally, do not add things to the list of extensions,
1745 /// all changes to that member will be overwritten.
1746 pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1747 /// Vulkan entry point.
1748 pub entry: &'arg ash::Entry,
1749 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1750 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1751 /// don't actually directly use `'this`
1752 _phantom: PhantomData<&'this ()>,
1753}
1754
1755/// Callback to allow changing the vulkan instance creation parameters.
1756///
1757/// # Safety:
1758/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1759/// as the create info value will be overwritten.
1760/// - Callback must not remove features.
1761/// - Callback must not change anything to what the instance does not support.
1762pub type CreateInstanceCallback<'this> =
1763 dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;