wgpu_hal/vulkan/mod.rs
1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8 - temporarily allocating `Vec` on heap, where overhead is permitted
9 - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29pub mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35mod swapchain;
36
37pub use adapter::PhysicalDeviceFeatures;
38
39use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
40use core::{
41 borrow::Borrow,
42 ffi::CStr,
43 fmt,
44 marker::PhantomData,
45 mem::{self, ManuallyDrop},
46 num::NonZeroU32,
47};
48
49use arrayvec::ArrayVec;
50use ash::{ext, khr, vk};
51use bytemuck::{Pod, Zeroable};
52use hashbrown::HashSet;
53use parking_lot::{Mutex, RwLock};
54
55use naga::FastHashMap;
56use wgt::InternalCounter;
57
58use semaphore_list::SemaphoreList;
59
60use crate::vulkan::semaphore_list::{SemaphoreListMode, SemaphoreType};
61
62const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
63
64#[derive(Clone, Debug)]
65pub struct Api;
66
67impl crate::Api for Api {
68 const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
69
70 type Instance = Instance;
71 type Surface = Surface;
72 type Adapter = Adapter;
73 type Device = Device;
74
75 type Queue = Queue;
76 type CommandEncoder = CommandEncoder;
77 type CommandBuffer = CommandBuffer;
78
79 type Buffer = Buffer;
80 type Texture = Texture;
81 type SurfaceTexture = SurfaceTexture;
82 type TextureView = TextureView;
83 type Sampler = Sampler;
84 type QuerySet = QuerySet;
85 type Fence = Fence;
86 type AccelerationStructure = AccelerationStructure;
87 type PipelineCache = PipelineCache;
88
89 type BindGroupLayout = BindGroupLayout;
90 type BindGroup = BindGroup;
91 type PipelineLayout = PipelineLayout;
92 type ShaderModule = ShaderModule;
93 type RenderPipeline = RenderPipeline;
94 type ComputePipeline = ComputePipeline;
95}
96
97crate::impl_dyn_resource!(
98 Adapter,
99 AccelerationStructure,
100 BindGroup,
101 BindGroupLayout,
102 Buffer,
103 CommandBuffer,
104 CommandEncoder,
105 ComputePipeline,
106 Device,
107 Fence,
108 Instance,
109 PipelineCache,
110 PipelineLayout,
111 QuerySet,
112 Queue,
113 RenderPipeline,
114 Sampler,
115 ShaderModule,
116 Surface,
117 SurfaceTexture,
118 Texture,
119 TextureView
120);
121
122struct DebugUtils {
123 extension: ext::debug_utils::Instance,
124 messenger: vk::DebugUtilsMessengerEXT,
125
126 /// Owning pointer to the debug messenger callback user data.
127 ///
128 /// `InstanceShared::drop` destroys the debug messenger before
129 /// dropping this, so the callback should never receive a dangling
130 /// user data pointer.
131 #[allow(dead_code)]
132 callback_data: Box<DebugUtilsMessengerUserData>,
133}
134
135pub struct DebugUtilsCreateInfo {
136 severity: vk::DebugUtilsMessageSeverityFlagsEXT,
137 message_type: vk::DebugUtilsMessageTypeFlagsEXT,
138 callback_data: Box<DebugUtilsMessengerUserData>,
139}
140
141#[derive(Debug)]
142/// The properties related to the validation layer needed for the
143/// DebugUtilsMessenger for their workarounds
144struct ValidationLayerProperties {
145 /// Validation layer description, from `vk::LayerProperties`.
146 layer_description: CString,
147
148 /// Validation layer specification version, from `vk::LayerProperties`.
149 layer_spec_version: u32,
150}
151
152/// User data needed by `instance::debug_utils_messenger_callback`.
153///
154/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
155/// pointer refers to one of these values.
156#[derive(Debug)]
157pub struct DebugUtilsMessengerUserData {
158 /// The properties related to the validation layer, if present
159 validation_layer_properties: Option<ValidationLayerProperties>,
160
161 /// If the OBS layer is present. OBS never increments the version of their layer,
162 /// so there's no reason to have the version.
163 has_obs_layer: bool,
164}
165
166pub struct InstanceShared {
167 raw: ash::Instance,
168 extensions: Vec<&'static CStr>,
169 flags: wgt::InstanceFlags,
170 memory_budget_thresholds: wgt::MemoryBudgetThresholds,
171 debug_utils: Option<DebugUtils>,
172 get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
173 entry: ash::Entry,
174 has_nv_optimus: bool,
175 android_sdk_version: u32,
176 /// The instance API version.
177 ///
178 /// Which is the version of Vulkan supported for instance-level functionality.
179 ///
180 /// It is associated with a `VkInstance` and its children,
181 /// except for a `VkPhysicalDevice` and its children.
182 instance_api_version: u32,
183
184 // The `drop_guard` field must be the last field of this struct so it is dropped last.
185 // Do not add new fields after it.
186 drop_guard: Option<crate::DropGuard>,
187}
188
189pub struct Instance {
190 shared: Arc<InstanceShared>,
191}
192
193pub struct Surface {
194 inner: ManuallyDrop<Box<dyn swapchain::Surface>>,
195 swapchain: RwLock<Option<Box<dyn swapchain::Swapchain>>>,
196}
197
198impl Surface {
199 /// Returns the raw Vulkan surface handle.
200 ///
201 /// Returns `None` if the surface is a DXGI surface.
202 pub unsafe fn raw_native_handle(&self) -> Option<vk::SurfaceKHR> {
203 Some(
204 self.inner
205 .as_any()
206 .downcast_ref::<swapchain::NativeSurface>()?
207 .as_raw(),
208 )
209 }
210
211 /// Get the raw Vulkan swapchain associated with this surface.
212 ///
213 /// Returns [`None`] if the surface is not configured or if the swapchain
214 /// is a DXGI swapchain.
215 pub fn raw_native_swapchain(&self) -> Option<vk::SwapchainKHR> {
216 let read = self.swapchain.read();
217 Some(
218 read.as_ref()?
219 .as_any()
220 .downcast_ref::<swapchain::NativeSwapchain>()?
221 .as_raw(),
222 )
223 }
224
225 /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
226 /// using [VK_GOOGLE_display_timing].
227 ///
228 /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
229 /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
230 ///
231 /// This can also be used to add a "not before" timestamp to the presentation.
232 ///
233 /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
234 ///
235 /// # Panics
236 ///
237 /// - If the surface hasn't been configured.
238 /// - If the surface has been configured for a DXGI swapchain.
239 /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
240 ///
241 /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
242 #[track_caller]
243 pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
244 let mut swapchain = self.swapchain.write();
245 swapchain
246 .as_mut()
247 .expect("Surface should have been configured")
248 .as_any_mut()
249 .downcast_mut::<swapchain::NativeSwapchain>()
250 .expect("Surface should have a native Vulkan swapchain")
251 .set_next_present_time(present_timing);
252 }
253}
254
255#[derive(Debug)]
256pub struct SurfaceTexture {
257 index: u32,
258 texture: Texture,
259 metadata: Box<dyn swapchain::SurfaceTextureMetadata>,
260}
261
262impl crate::DynSurfaceTexture for SurfaceTexture {}
263
264impl Borrow<Texture> for SurfaceTexture {
265 fn borrow(&self) -> &Texture {
266 &self.texture
267 }
268}
269
270impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
271 fn borrow(&self) -> &dyn crate::DynTexture {
272 &self.texture
273 }
274}
275
276pub struct Adapter {
277 raw: vk::PhysicalDevice,
278 instance: Arc<InstanceShared>,
279 //queue_families: Vec<vk::QueueFamilyProperties>,
280 known_memory_flags: vk::MemoryPropertyFlags,
281 phd_capabilities: adapter::PhysicalDeviceProperties,
282 phd_features: PhysicalDeviceFeatures,
283 downlevel_flags: wgt::DownlevelFlags,
284 private_caps: PrivateCapabilities,
285 workarounds: Workarounds,
286}
287
288// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
289enum ExtensionFn<T> {
290 /// The loaded function pointer struct for an extension.
291 Extension(T),
292 /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
293 Promoted,
294}
295
296struct DeviceExtensionFunctions {
297 debug_utils: Option<ext::debug_utils::Device>,
298 draw_indirect_count: Option<khr::draw_indirect_count::Device>,
299 timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
300 ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
301 mesh_shading: Option<ext::mesh_shader::Device>,
302}
303
304struct RayTracingDeviceExtensionFunctions {
305 acceleration_structure: khr::acceleration_structure::Device,
306 buffer_device_address: khr::buffer_device_address::Device,
307}
308
309/// Set of internal capabilities, which don't show up in the exposed
310/// device geometry, but affect the code paths taken internally.
311#[derive(Clone, Debug)]
312struct PrivateCapabilities {
313 image_view_usage: bool,
314 timeline_semaphores: bool,
315 texture_d24: bool,
316 texture_d24_s8: bool,
317 texture_s8: bool,
318 /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
319 can_present: bool,
320 non_coherent_map_mask: wgt::BufferAddress,
321 multi_draw_indirect: bool,
322 max_draw_indirect_count: u32,
323
324 /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
325 ///
326 /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
327 /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
328 /// a given bindgroup binding outside that binding's [accessible
329 /// region][ar]. Enabling `robustBufferAccess` does ensure that
330 /// out-of-bounds reads and writes are not undefined behavior (that's good),
331 /// but still permits out-of-bounds reads to return data from anywhere
332 /// within the buffer, not just the accessible region.
333 ///
334 /// [ar]: ../struct.BufferBinding.html#accessible-region
335 /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
336 robust_buffer_access: bool,
337
338 robust_image_access: bool,
339
340 /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
341 /// [`robustBufferAccess2`] feature.
342 ///
343 /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
344 /// shader accesses to buffer contents. If this feature is not available,
345 /// this backend must have Naga inject bounds checks in the generated
346 /// SPIR-V.
347 ///
348 /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
349 /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
350 /// [ar]: ../struct.BufferBinding.html#accessible-region
351 robust_buffer_access2: bool,
352
353 robust_image_access2: bool,
354 zero_initialize_workgroup_memory: bool,
355 image_format_list: bool,
356 maximum_samplers: u32,
357
358 /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
359 /// (promoted to Vulkan 1.3).
360 ///
361 /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
362 ///
363 /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
364 shader_integer_dot_product: bool,
365
366 /// True if this adapter supports 8-bit integers provided by the
367 /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
368 ///
369 /// Allows shaders to declare the "Int8" capability. Note, however, that this
370 /// feature alone allows the use of 8-bit integers "only in the `Private`,
371 /// `Workgroup` (for non-Block variables), and `Function` storage classes"
372 /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
373 /// `StorageBuffer`), you also need to enable the corresponding feature in
374 /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
375 /// capability (e.g., `StorageBuffer8BitAccess`).
376 ///
377 /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
378 /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
379 shader_int8: bool,
380
381 /// This is done to panic before undefined behavior, and is imperfect.
382 /// Basically, to allow implementations to emulate mv using instancing, if you
383 /// want to draw `n` instances to VR, you must draw `2n` instances, but you
384 /// can never draw more than `u32::MAX` instances. Therefore, when drawing
385 /// multiview on some vulkan implementations, it might restrict the instance
386 /// count, which isn't usually a thing in webgpu. We don't expose this limit
387 /// because its strange, i.e. only occurs on certain vulkan implementations
388 /// if you are drawing more than 128 million instances. We still want to avoid
389 /// undefined behavior in this situation, so we panic if the limit is violated.
390 multiview_instance_index_limit: u32,
391
392 /// BufferUsages::ACCELERATION_STRUCTURE_SCRATCH allows usage as a scratch buffer.
393 /// Vulkan has no way to specify this as a usage, and it maps to other usages, but
394 /// these usages do not have as high of an alignment requirement using the buffer as
395 /// a scratch buffer when building acceleration structures.
396 scratch_buffer_alignment: u32,
397}
398
399bitflags::bitflags!(
400 /// Workaround flags.
401 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
402 pub struct Workarounds: u32 {
403 /// Only generate SPIR-V for one entry point at a time.
404 const SEPARATE_ENTRY_POINTS = 0x1;
405 /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
406 /// to a subpass resolve attachment array. This nulls out that pointer in that case.
407 const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
408 /// If the following code returns false, then nvidia will end up filling the wrong range.
409 ///
410 /// ```skip
411 /// fn nvidia_succeeds() -> bool {
412 /// # let (copy_length, start_offset) = (0, 0);
413 /// if copy_length >= 4096 {
414 /// if start_offset % 16 != 0 {
415 /// if copy_length == 4096 {
416 /// return true;
417 /// }
418 /// if copy_length % 16 == 0 {
419 /// return false;
420 /// }
421 /// }
422 /// }
423 /// true
424 /// }
425 /// ```
426 ///
427 /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
428 /// if they cover a range of 4096 bytes or more.
429 const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
430 }
431);
432
433#[derive(Clone, Debug, Eq, Hash, PartialEq)]
434struct AttachmentKey {
435 format: vk::Format,
436 layout: vk::ImageLayout,
437 ops: crate::AttachmentOps,
438}
439
440impl AttachmentKey {
441 /// Returns an attachment key for a compatible attachment.
442 fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
443 Self {
444 format,
445 layout,
446 ops: crate::AttachmentOps::all(),
447 }
448 }
449}
450
451#[derive(Clone, Eq, Hash, PartialEq)]
452struct ColorAttachmentKey {
453 base: AttachmentKey,
454 resolve: Option<AttachmentKey>,
455}
456
457#[derive(Clone, Eq, Hash, PartialEq)]
458struct DepthStencilAttachmentKey {
459 base: AttachmentKey,
460 stencil_ops: crate::AttachmentOps,
461}
462
463#[derive(Clone, Eq, Default, Hash, PartialEq)]
464struct RenderPassKey {
465 colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
466 depth_stencil: Option<DepthStencilAttachmentKey>,
467 sample_count: u32,
468 multiview_mask: Option<NonZeroU32>,
469}
470
471struct DeviceShared {
472 raw: ash::Device,
473 family_index: u32,
474 queue_index: u32,
475 raw_queue: vk::Queue,
476 instance: Arc<InstanceShared>,
477 physical_device: vk::PhysicalDevice,
478 enabled_extensions: Vec<&'static CStr>,
479 extension_fns: DeviceExtensionFunctions,
480 vendor_id: u32,
481 pipeline_cache_validation_key: [u8; 16],
482 timestamp_period: f32,
483 private_caps: PrivateCapabilities,
484 workarounds: Workarounds,
485 features: wgt::Features,
486 render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
487 sampler_cache: Mutex<sampler::SamplerCache>,
488 memory_allocations_counter: InternalCounter,
489
490 /// Because we have cached framebuffers which are not deleted from until
491 /// the device is destroyed, if the implementation of vulkan re-uses handles
492 /// we need some way to differentiate between the old handle and the new handle.
493 /// This factory allows us to have a dedicated identity value for each texture.
494 texture_identity_factory: ResourceIdentityFactory<vk::Image>,
495 /// As above, for texture views.
496 texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
497
498 // The `drop_guard` field must be the last field of this struct so it is dropped last.
499 // Do not add new fields after it.
500 drop_guard: Option<crate::DropGuard>,
501}
502
503impl Drop for DeviceShared {
504 fn drop(&mut self) {
505 for &raw in self.render_passes.lock().values() {
506 unsafe { self.raw.destroy_render_pass(raw, None) };
507 }
508 if self.drop_guard.is_none() {
509 unsafe { self.raw.destroy_device(None) };
510 }
511 }
512}
513
514pub struct Device {
515 mem_allocator: Mutex<gpu_allocator::vulkan::Allocator>,
516 desc_allocator:
517 Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
518 valid_ash_memory_types: u32,
519 naga_options: naga::back::spv::Options<'static>,
520 #[cfg(feature = "renderdoc")]
521 render_doc: crate::auxil::renderdoc::RenderDoc,
522 counters: Arc<wgt::HalCounters>,
523 // Struct members are dropped from first to last, put the Device last to ensure that
524 // all resources that depends on it are destroyed before it like the mem_allocator
525 shared: Arc<DeviceShared>,
526}
527
528impl Drop for Device {
529 fn drop(&mut self) {
530 unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
531 }
532}
533
534/// Semaphores for forcing queue submissions to run in order.
535///
536/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
537/// ordered, then the first submission will finish on the GPU before the second
538/// submission begins. To get this behavior on Vulkan we need to pass semaphores
539/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
540/// and to signal when their execution is done.
541///
542/// Normally this can be done with a single semaphore, waited on and then
543/// signalled for each submission. At any given time there's exactly one
544/// submission that would signal the semaphore, and exactly one waiting on it,
545/// as Vulkan requires.
546///
547/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
548/// hang if we use a single semaphore. The workaround is to alternate between
549/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
550/// the workaround until, say, Oct 2026.
551///
552/// [`wgpu_hal::Queue`]: crate::Queue
553/// [`submit`]: crate::Queue::submit
554/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
555/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
556#[derive(Clone)]
557struct RelaySemaphores {
558 /// The semaphore the next submission should wait on before beginning
559 /// execution on the GPU. This is `None` for the first submission, which
560 /// should not wait on anything at all.
561 wait: Option<vk::Semaphore>,
562
563 /// The semaphore the next submission should signal when it has finished
564 /// execution on the GPU.
565 signal: vk::Semaphore,
566}
567
568impl RelaySemaphores {
569 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
570 Ok(Self {
571 wait: None,
572 signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
573 })
574 }
575
576 /// Advances the semaphores, returning the semaphores that should be used for a submission.
577 fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
578 let old = self.clone();
579
580 // Build the state for the next submission.
581 match self.wait {
582 None => {
583 // The `old` values describe the first submission to this queue.
584 // The second submission should wait on `old.signal`, and then
585 // signal a new semaphore which we'll create now.
586 self.wait = Some(old.signal);
587 self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
588 }
589 Some(ref mut wait) => {
590 // What this submission signals, the next should wait.
591 mem::swap(wait, &mut self.signal);
592 }
593 };
594
595 Ok(old)
596 }
597
598 /// Destroys the semaphores.
599 unsafe fn destroy(&self, device: &ash::Device) {
600 unsafe {
601 if let Some(wait) = self.wait {
602 device.destroy_semaphore(wait, None);
603 }
604 device.destroy_semaphore(self.signal, None);
605 }
606 }
607}
608
609pub struct Queue {
610 raw: vk::Queue,
611 device: Arc<DeviceShared>,
612 family_index: u32,
613 relay_semaphores: Mutex<RelaySemaphores>,
614 signal_semaphores: Mutex<SemaphoreList>,
615}
616
617impl Queue {
618 pub fn as_raw(&self) -> vk::Queue {
619 self.raw
620 }
621}
622
623impl Drop for Queue {
624 fn drop(&mut self) {
625 unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
626 }
627}
628#[derive(Debug)]
629enum BufferMemoryBacking {
630 Managed(gpu_allocator::vulkan::Allocation),
631 VulkanMemory {
632 memory: vk::DeviceMemory,
633 offset: u64,
634 size: u64,
635 },
636}
637impl BufferMemoryBacking {
638 fn memory(&self) -> vk::DeviceMemory {
639 match self {
640 Self::Managed(m) => unsafe { m.memory() },
641 Self::VulkanMemory { memory, .. } => *memory,
642 }
643 }
644 fn offset(&self) -> u64 {
645 match self {
646 Self::Managed(m) => m.offset(),
647 Self::VulkanMemory { offset, .. } => *offset,
648 }
649 }
650 fn size(&self) -> u64 {
651 match self {
652 Self::Managed(m) => m.size(),
653 Self::VulkanMemory { size, .. } => *size,
654 }
655 }
656}
657#[derive(Debug)]
658pub struct Buffer {
659 raw: vk::Buffer,
660 allocation: Option<Mutex<BufferMemoryBacking>>,
661}
662impl Buffer {
663 /// # Safety
664 ///
665 /// - `vk_buffer`'s memory must be managed by the caller
666 /// - Externally imported buffers can't be mapped by `wgpu`
667 pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
668 Self {
669 raw: vk_buffer,
670 allocation: None,
671 }
672 }
673 /// # Safety
674 /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
675 /// - Externally imported buffers can't be mapped by `wgpu`
676 /// - `offset` and `size` must be valid with the allocation of `memory`
677 pub unsafe fn from_raw_managed(
678 vk_buffer: vk::Buffer,
679 memory: vk::DeviceMemory,
680 offset: u64,
681 size: u64,
682 ) -> Self {
683 Self {
684 raw: vk_buffer,
685 allocation: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
686 memory,
687 offset,
688 size,
689 })),
690 }
691 }
692}
693
694impl crate::DynBuffer for Buffer {}
695
696#[derive(Debug)]
697pub struct AccelerationStructure {
698 raw: vk::AccelerationStructureKHR,
699 buffer: vk::Buffer,
700 allocation: gpu_allocator::vulkan::Allocation,
701 compacted_size_query: Option<vk::QueryPool>,
702}
703
704impl crate::DynAccelerationStructure for AccelerationStructure {}
705
706#[derive(Debug)]
707pub enum TextureMemory {
708 // shared memory in GPU allocator (owned by wgpu-hal)
709 Allocation(gpu_allocator::vulkan::Allocation),
710
711 // dedicated memory (owned by wgpu-hal)
712 Dedicated(vk::DeviceMemory),
713
714 // memory not owned by wgpu
715 External,
716}
717
718#[derive(Debug)]
719pub struct Texture {
720 raw: vk::Image,
721 memory: TextureMemory,
722 format: wgt::TextureFormat,
723 copy_size: crate::CopyExtent,
724 identity: ResourceIdentity<vk::Image>,
725
726 // The `drop_guard` field must be the last field of this struct so it is dropped last.
727 // Do not add new fields after it.
728 drop_guard: Option<crate::DropGuard>,
729}
730
731impl crate::DynTexture for Texture {}
732
733impl Texture {
734 /// # Safety
735 ///
736 /// - The image handle must not be manually destroyed
737 pub unsafe fn raw_handle(&self) -> vk::Image {
738 self.raw
739 }
740
741 /// # Safety
742 ///
743 /// - The caller must not free the `vk::DeviceMemory` or
744 /// `gpu_alloc::MemoryBlock` in the returned `TextureMemory`.
745 pub unsafe fn memory(&self) -> &TextureMemory {
746 &self.memory
747 }
748}
749
750#[derive(Debug)]
751pub struct TextureView {
752 raw_texture: vk::Image,
753 raw: vk::ImageView,
754 _layers: NonZeroU32,
755 format: wgt::TextureFormat,
756 raw_format: vk::Format,
757 base_mip_level: u32,
758 dimension: wgt::TextureViewDimension,
759 texture_identity: ResourceIdentity<vk::Image>,
760 view_identity: ResourceIdentity<vk::ImageView>,
761}
762
763impl crate::DynTextureView for TextureView {}
764
765impl TextureView {
766 /// # Safety
767 ///
768 /// - The image view handle must not be manually destroyed
769 pub unsafe fn raw_handle(&self) -> vk::ImageView {
770 self.raw
771 }
772
773 /// Returns the raw texture view, along with its identity.
774 fn identified_raw_view(&self) -> IdentifiedTextureView {
775 IdentifiedTextureView {
776 raw: self.raw,
777 identity: self.view_identity,
778 }
779 }
780}
781
782#[derive(Debug)]
783pub struct Sampler {
784 raw: vk::Sampler,
785 create_info: vk::SamplerCreateInfo<'static>,
786}
787
788impl crate::DynSampler for Sampler {}
789
790/// Information about a binding within a specific BindGroupLayout / BindGroup.
791/// This will be used to construct a [`naga::back::spv::BindingInfo`], where
792/// the descriptor set value will be taken from the index of the group.
793#[derive(Copy, Clone, Debug)]
794struct BindingInfo {
795 binding: u32,
796 binding_array_size: Option<NonZeroU32>,
797}
798
799#[derive(Debug)]
800pub struct BindGroupLayout {
801 raw: vk::DescriptorSetLayout,
802 desc_count: gpu_descriptor::DescriptorTotalCount,
803 /// Sorted list of entries.
804 entries: Box<[wgt::BindGroupLayoutEntry]>,
805 /// Map of original binding index to remapped binding index and optional
806 /// array size.
807 binding_map: Vec<(u32, BindingInfo)>,
808 contains_binding_arrays: bool,
809}
810
811impl crate::DynBindGroupLayout for BindGroupLayout {}
812
813#[derive(Debug)]
814pub struct PipelineLayout {
815 raw: vk::PipelineLayout,
816 binding_map: naga::back::spv::BindingMap,
817}
818
819impl crate::DynPipelineLayout for PipelineLayout {}
820
821#[derive(Debug)]
822pub struct BindGroup {
823 set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
824}
825
826impl crate::DynBindGroup for BindGroup {}
827
828/// Miscellaneous allocation recycling pool for `CommandAllocator`.
829#[derive(Default)]
830struct Temp {
831 marker: Vec<u8>,
832 buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
833 image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
834}
835
836impl Temp {
837 fn clear(&mut self) {
838 self.marker.clear();
839 self.buffer_barriers.clear();
840 self.image_barriers.clear();
841 }
842
843 fn make_c_str(&mut self, name: &str) -> &CStr {
844 self.marker.clear();
845 self.marker.extend_from_slice(name.as_bytes());
846 self.marker.push(0);
847 unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
848 }
849}
850
851/// Generates unique IDs for each resource of type `T`.
852///
853/// Because vk handles are not permanently unique, this
854/// provides a way to generate unique IDs for each resource.
855struct ResourceIdentityFactory<T> {
856 #[cfg(not(target_has_atomic = "64"))]
857 next_id: Mutex<u64>,
858 #[cfg(target_has_atomic = "64")]
859 next_id: core::sync::atomic::AtomicU64,
860 _phantom: PhantomData<T>,
861}
862
863impl<T> ResourceIdentityFactory<T> {
864 fn new() -> Self {
865 Self {
866 #[cfg(not(target_has_atomic = "64"))]
867 next_id: Mutex::new(0),
868 #[cfg(target_has_atomic = "64")]
869 next_id: core::sync::atomic::AtomicU64::new(0),
870 _phantom: PhantomData,
871 }
872 }
873
874 /// Returns a new unique ID for a resource of type `T`.
875 fn next(&self) -> ResourceIdentity<T> {
876 #[cfg(not(target_has_atomic = "64"))]
877 {
878 let mut next_id = self.next_id.lock();
879 let id = *next_id;
880 *next_id += 1;
881 ResourceIdentity {
882 id,
883 _phantom: PhantomData,
884 }
885 }
886
887 #[cfg(target_has_atomic = "64")]
888 ResourceIdentity {
889 id: self
890 .next_id
891 .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
892 _phantom: PhantomData,
893 }
894 }
895}
896
897/// A unique identifier for a resource of type `T`.
898///
899/// This is used as a hashable key for resources, which
900/// is permanently unique through the lifetime of the program.
901#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
902struct ResourceIdentity<T> {
903 id: u64,
904 _phantom: PhantomData<T>,
905}
906
907#[derive(Clone, Eq, Hash, PartialEq)]
908struct FramebufferKey {
909 raw_pass: vk::RenderPass,
910 /// Because this is used as a key in a hash map, we need to include the identity
911 /// so that this hashes differently, even if the ImageView handles are the same
912 /// between different views.
913 attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
914 /// While this is redundant for calculating the hash, we need access to an array
915 /// of all the raw ImageViews when we are creating the actual framebuffer,
916 /// so we store this here.
917 attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
918 extent: wgt::Extent3d,
919}
920
921impl FramebufferKey {
922 fn push_view(&mut self, view: IdentifiedTextureView) {
923 self.attachment_identities.push(view.identity);
924 self.attachment_views.push(view.raw);
925 }
926}
927
928/// A texture view paired with its identity.
929#[derive(Copy, Clone)]
930struct IdentifiedTextureView {
931 raw: vk::ImageView,
932 identity: ResourceIdentity<vk::ImageView>,
933}
934
935#[derive(Clone, Eq, Hash, PartialEq)]
936struct TempTextureViewKey {
937 texture: vk::Image,
938 /// As this is used in a hashmap, we need to
939 /// include the identity so that this hashes differently,
940 /// even if the Image handles are the same between different images.
941 texture_identity: ResourceIdentity<vk::Image>,
942 format: vk::Format,
943 mip_level: u32,
944 depth_slice: u32,
945}
946
947pub struct CommandEncoder {
948 raw: vk::CommandPool,
949 device: Arc<DeviceShared>,
950
951 /// The current command buffer, if `self` is in the ["recording"]
952 /// state.
953 ///
954 /// ["recording"]: crate::CommandEncoder
955 ///
956 /// If non-`null`, the buffer is in the Vulkan "recording" state.
957 active: vk::CommandBuffer,
958
959 /// What kind of pass we are currently within: compute or render.
960 bind_point: vk::PipelineBindPoint,
961
962 /// Allocation recycling pool for this encoder.
963 temp: Temp,
964
965 /// A pool of available command buffers.
966 ///
967 /// These are all in the Vulkan "initial" state.
968 free: Vec<vk::CommandBuffer>,
969
970 /// A pool of discarded command buffers.
971 ///
972 /// These could be in any Vulkan state except "pending".
973 discarded: Vec<vk::CommandBuffer>,
974
975 /// If this is true, the active renderpass enabled a debug span,
976 /// and needs to be disabled on renderpass close.
977 rpass_debug_marker_active: bool,
978
979 /// If set, the end of the next render/compute pass will write a timestamp at
980 /// the given pool & location.
981 end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
982
983 framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
984 temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
985
986 counters: Arc<wgt::HalCounters>,
987
988 current_pipeline_is_multiview: bool,
989}
990
991impl Drop for CommandEncoder {
992 fn drop(&mut self) {
993 // SAFETY:
994 //
995 // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
996 // `CommandBuffer` must live until its execution is complete, and that a
997 // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
998 // Thus, we know that none of our `CommandBuffers` are in the "pending"
999 // state.
1000 //
1001 // The other VUIDs are pretty obvious.
1002 unsafe {
1003 // `vkDestroyCommandPool` also frees any command buffers allocated
1004 // from that pool, so there's no need to explicitly call
1005 // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1006 // fields.
1007 self.device.raw.destroy_command_pool(self.raw, None);
1008 }
1009
1010 for (_, fb) in self.framebuffers.drain() {
1011 unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1012 }
1013
1014 for (_, view) in self.temp_texture_views.drain() {
1015 unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1016 }
1017
1018 self.counters.command_encoders.sub(1);
1019 }
1020}
1021
1022impl CommandEncoder {
1023 /// # Safety
1024 ///
1025 /// - The command buffer handle must not be manually destroyed
1026 pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1027 self.active
1028 }
1029}
1030
1031impl fmt::Debug for CommandEncoder {
1032 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1033 f.debug_struct("CommandEncoder")
1034 .field("raw", &self.raw)
1035 .finish()
1036 }
1037}
1038
1039#[derive(Debug)]
1040pub struct CommandBuffer {
1041 raw: vk::CommandBuffer,
1042}
1043
1044impl crate::DynCommandBuffer for CommandBuffer {}
1045
1046#[derive(Debug)]
1047#[allow(clippy::large_enum_variant)]
1048pub enum ShaderModule {
1049 Raw(vk::ShaderModule),
1050 Intermediate {
1051 naga_shader: crate::NagaShader,
1052 runtime_checks: wgt::ShaderRuntimeChecks,
1053 },
1054}
1055
1056impl crate::DynShaderModule for ShaderModule {}
1057
1058#[derive(Debug)]
1059pub struct RenderPipeline {
1060 raw: vk::Pipeline,
1061 is_multiview: bool,
1062}
1063
1064impl crate::DynRenderPipeline for RenderPipeline {}
1065
1066#[derive(Debug)]
1067pub struct ComputePipeline {
1068 raw: vk::Pipeline,
1069}
1070
1071impl crate::DynComputePipeline for ComputePipeline {}
1072
1073#[derive(Debug)]
1074pub struct PipelineCache {
1075 raw: vk::PipelineCache,
1076}
1077
1078impl crate::DynPipelineCache for PipelineCache {}
1079
1080#[derive(Debug)]
1081pub struct QuerySet {
1082 raw: vk::QueryPool,
1083}
1084
1085impl crate::DynQuerySet for QuerySet {}
1086
1087/// The [`Api::Fence`] type for [`vulkan::Api`].
1088///
1089/// This is an `enum` because there are two possible implementations of
1090/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1091/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1092/// require non-1.0 features.
1093///
1094/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1095/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1096/// otherwise.
1097///
1098/// [`Api::Fence`]: crate::Api::Fence
1099/// [`vulkan::Api`]: Api
1100/// [`Device::create_fence`]: crate::Device::create_fence
1101/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1102/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1103/// [`FencePool`]: Fence::FencePool
1104#[derive(Debug)]
1105pub enum Fence {
1106 /// A Vulkan [timeline semaphore].
1107 ///
1108 /// These are simpler to use than Vulkan fences, since timeline semaphores
1109 /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1110 ///
1111 /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1112 /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1113 TimelineSemaphore(vk::Semaphore),
1114
1115 /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1116 ///
1117 /// The effective [`FenceValue`] of this variant is the greater of
1118 /// `last_completed` and the maximum value associated with a signalled fence
1119 /// in `active`.
1120 ///
1121 /// Fences are available in all versions of Vulkan, but since they only have
1122 /// two states, "signaled" and "unsignaled", we need to use a separate fence
1123 /// for each queue submission we might want to wait for, and remember which
1124 /// [`FenceValue`] each one represents.
1125 ///
1126 /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1127 /// [`FenceValue`]: crate::FenceValue
1128 FencePool {
1129 last_completed: crate::FenceValue,
1130 /// The pending fence values have to be ascending.
1131 active: Vec<(crate::FenceValue, vk::Fence)>,
1132 free: Vec<vk::Fence>,
1133 },
1134}
1135
1136impl crate::DynFence for Fence {}
1137
1138impl Fence {
1139 /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1140 ///
1141 /// As an optimization, assume that we already know that the fence has
1142 /// reached `last_completed`, and don't bother checking fences whose values
1143 /// are less than that: those fences remain in the `active` array only
1144 /// because we haven't called `maintain` yet to clean them up.
1145 ///
1146 /// [`FenceValue`]: crate::FenceValue
1147 fn check_active(
1148 device: &ash::Device,
1149 mut last_completed: crate::FenceValue,
1150 active: &[(crate::FenceValue, vk::Fence)],
1151 ) -> Result<crate::FenceValue, crate::DeviceError> {
1152 for &(value, raw) in active.iter() {
1153 unsafe {
1154 if value > last_completed
1155 && device
1156 .get_fence_status(raw)
1157 .map_err(map_host_device_oom_and_lost_err)?
1158 {
1159 last_completed = value;
1160 }
1161 }
1162 }
1163 Ok(last_completed)
1164 }
1165
1166 /// Return the highest signalled [`FenceValue`] for `self`.
1167 ///
1168 /// [`FenceValue`]: crate::FenceValue
1169 fn get_latest(
1170 &self,
1171 device: &ash::Device,
1172 extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1173 ) -> Result<crate::FenceValue, crate::DeviceError> {
1174 match *self {
1175 Self::TimelineSemaphore(raw) => unsafe {
1176 Ok(match *extension.unwrap() {
1177 ExtensionFn::Extension(ref ext) => ext
1178 .get_semaphore_counter_value(raw)
1179 .map_err(map_host_device_oom_and_lost_err)?,
1180 ExtensionFn::Promoted => device
1181 .get_semaphore_counter_value(raw)
1182 .map_err(map_host_device_oom_and_lost_err)?,
1183 })
1184 },
1185 Self::FencePool {
1186 last_completed,
1187 ref active,
1188 free: _,
1189 } => Self::check_active(device, last_completed, active),
1190 }
1191 }
1192
1193 /// Trim the internal state of this [`Fence`].
1194 ///
1195 /// This function has no externally visible effect, but you should call it
1196 /// periodically to keep this fence's resource consumption under control.
1197 ///
1198 /// For fences using the [`FencePool`] implementation, this function
1199 /// recycles fences that have been signaled. If you don't call this,
1200 /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1201 /// time it's called.
1202 ///
1203 /// [`FencePool`]: Fence::FencePool
1204 /// [`Queue::submit`]: crate::Queue::submit
1205 fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1206 match *self {
1207 Self::TimelineSemaphore(_) => {}
1208 Self::FencePool {
1209 ref mut last_completed,
1210 ref mut active,
1211 ref mut free,
1212 } => {
1213 let latest = Self::check_active(device, *last_completed, active)?;
1214 let base_free = free.len();
1215 for &(value, raw) in active.iter() {
1216 if value <= latest {
1217 free.push(raw);
1218 }
1219 }
1220 if free.len() != base_free {
1221 active.retain(|&(value, _)| value > latest);
1222 unsafe { device.reset_fences(&free[base_free..]) }
1223 .map_err(map_device_oom_err)?
1224 }
1225 *last_completed = latest;
1226 }
1227 }
1228 Ok(())
1229 }
1230}
1231
1232impl crate::Queue for Queue {
1233 type A = Api;
1234
1235 unsafe fn submit(
1236 &self,
1237 command_buffers: &[&CommandBuffer],
1238 surface_textures: &[&SurfaceTexture],
1239 (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1240 ) -> Result<(), crate::DeviceError> {
1241 let mut fence_raw = vk::Fence::null();
1242
1243 let mut wait_semaphores = SemaphoreList::new(SemaphoreListMode::Wait);
1244 let mut signal_semaphores = SemaphoreList::new(SemaphoreListMode::Signal);
1245
1246 // Double check that the same swapchain image isn't being given to us multiple times,
1247 // as that will deadlock when we try to lock them all.
1248 debug_assert!(
1249 {
1250 let mut check = HashSet::with_capacity(surface_textures.len());
1251 // We compare the Box by pointer, as Eq isn't well defined for SurfaceSemaphores.
1252 for st in surface_textures {
1253 let ptr: *const () = <*const _>::cast(&*st.metadata);
1254 check.insert(ptr as usize);
1255 }
1256 check.len() == surface_textures.len()
1257 },
1258 "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1259 );
1260
1261 let locked_swapchain_semaphores = surface_textures
1262 .iter()
1263 .map(|st| st.metadata.get_semaphore_guard())
1264 .collect::<Vec<_>>();
1265
1266 for mut semaphores in locked_swapchain_semaphores {
1267 semaphores.set_used_fence_value(signal_value);
1268
1269 // If we're the first submission to operate on this image, wait on
1270 // its acquire semaphore, to make sure the presentation engine is
1271 // done with it.
1272 if let Some(sem) = semaphores.get_acquire_wait_semaphore() {
1273 wait_semaphores.push_wait(sem, vk::PipelineStageFlags::TOP_OF_PIPE);
1274 }
1275
1276 // Get a semaphore to signal when we're done writing to this surface
1277 // image. Presentation of this image will wait for this.
1278 let signal_semaphore = semaphores.get_submit_signal_semaphore(&self.device)?;
1279 signal_semaphores.push_signal(signal_semaphore);
1280 }
1281
1282 let mut guard = self.signal_semaphores.lock();
1283 if !guard.is_empty() {
1284 signal_semaphores.append(&mut guard);
1285 }
1286
1287 // In order for submissions to be strictly ordered, we encode a dependency between each submission
1288 // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1289 let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1290
1291 if let Some(sem) = semaphore_state.wait {
1292 wait_semaphores.push_wait(
1293 SemaphoreType::Binary(sem),
1294 vk::PipelineStageFlags::TOP_OF_PIPE,
1295 );
1296 }
1297
1298 signal_semaphores.push_signal(SemaphoreType::Binary(semaphore_state.signal));
1299
1300 // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1301 signal_fence.maintain(&self.device.raw)?;
1302 match *signal_fence {
1303 Fence::TimelineSemaphore(raw) => {
1304 signal_semaphores.push_signal(SemaphoreType::Timeline(raw, signal_value));
1305 }
1306 Fence::FencePool {
1307 ref mut active,
1308 ref mut free,
1309 ..
1310 } => {
1311 fence_raw = match free.pop() {
1312 Some(raw) => raw,
1313 None => unsafe {
1314 self.device
1315 .raw
1316 .create_fence(&vk::FenceCreateInfo::default(), None)
1317 .map_err(map_host_device_oom_err)?
1318 },
1319 };
1320 active.push((signal_value, fence_raw));
1321 }
1322 }
1323
1324 let vk_cmd_buffers = command_buffers
1325 .iter()
1326 .map(|cmd| cmd.raw)
1327 .collect::<Vec<_>>();
1328
1329 let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1330 let mut vk_timeline_info = mem::MaybeUninit::uninit();
1331 vk_info = SemaphoreList::add_to_submit(
1332 &mut wait_semaphores,
1333 &mut signal_semaphores,
1334 vk_info,
1335 &mut vk_timeline_info,
1336 );
1337
1338 profiling::scope!("vkQueueSubmit");
1339 unsafe {
1340 self.device
1341 .raw
1342 .queue_submit(self.raw, &[vk_info], fence_raw)
1343 .map_err(map_host_device_oom_and_lost_err)?
1344 };
1345 Ok(())
1346 }
1347
1348 unsafe fn present(
1349 &self,
1350 surface: &Surface,
1351 texture: SurfaceTexture,
1352 ) -> Result<(), crate::SurfaceError> {
1353 let mut swapchain = surface.swapchain.write();
1354
1355 unsafe { swapchain.as_mut().unwrap().present(self, texture) }
1356 }
1357
1358 unsafe fn get_timestamp_period(&self) -> f32 {
1359 self.device.timestamp_period
1360 }
1361}
1362
1363impl Queue {
1364 pub fn raw_device(&self) -> &ash::Device {
1365 &self.device.raw
1366 }
1367
1368 pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1369 let mut guard = self.signal_semaphores.lock();
1370 if let Some(value) = semaphore_value {
1371 guard.push_signal(SemaphoreType::Timeline(semaphore, value));
1372 } else {
1373 guard.push_signal(SemaphoreType::Binary(semaphore));
1374 }
1375 }
1376}
1377
1378/// Maps
1379///
1380/// - VK_ERROR_OUT_OF_HOST_MEMORY
1381/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1382fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1383 match err {
1384 vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1385 get_oom_err(err)
1386 }
1387 e => get_unexpected_err(e),
1388 }
1389}
1390
1391/// Maps
1392///
1393/// - VK_ERROR_OUT_OF_HOST_MEMORY
1394/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1395/// - VK_ERROR_DEVICE_LOST
1396fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1397 match err {
1398 vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1399 other => map_host_device_oom_err(other),
1400 }
1401}
1402
1403/// Maps
1404///
1405/// - VK_ERROR_OUT_OF_HOST_MEMORY
1406/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1407/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1408fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1409 // We don't use VK_KHR_buffer_device_address
1410 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1411 map_host_device_oom_err(err)
1412}
1413
1414/// Maps
1415///
1416/// - VK_ERROR_OUT_OF_HOST_MEMORY
1417fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1418 match err {
1419 vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1420 e => get_unexpected_err(e),
1421 }
1422}
1423
1424/// Maps
1425///
1426/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1427fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1428 match err {
1429 vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1430 e => get_unexpected_err(e),
1431 }
1432}
1433
1434/// Maps
1435///
1436/// - VK_ERROR_OUT_OF_HOST_MEMORY
1437/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1438fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1439 // We don't use VK_KHR_buffer_device_address
1440 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1441 map_host_oom_err(err)
1442}
1443
1444/// Maps
1445///
1446/// - VK_ERROR_OUT_OF_HOST_MEMORY
1447/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1448/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1449/// - VK_ERROR_INVALID_SHADER_NV
1450fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1451 // We don't use VK_EXT_pipeline_creation_cache_control
1452 // VK_PIPELINE_COMPILE_REQUIRED_EXT
1453 // We don't use VK_NV_glsl_shader
1454 // VK_ERROR_INVALID_SHADER_NV
1455 map_host_device_oom_err(err)
1456}
1457
1458/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1459/// feature flag is enabled.
1460fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1461 #[cfg(feature = "internal_error_panic")]
1462 panic!("Unexpected Vulkan error: {_err:?}");
1463
1464 #[allow(unreachable_code)]
1465 crate::DeviceError::Unexpected
1466}
1467
1468/// Returns [`crate::DeviceError::OutOfMemory`].
1469fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1470 crate::DeviceError::OutOfMemory
1471}
1472
1473/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1474/// feature flag is enabled.
1475fn get_lost_err() -> crate::DeviceError {
1476 #[cfg(feature = "device_lost_panic")]
1477 panic!("Device lost");
1478
1479 #[allow(unreachable_code)]
1480 crate::DeviceError::Lost
1481}
1482
1483#[derive(Clone, Copy, Pod, Zeroable)]
1484#[repr(C)]
1485struct RawTlasInstance {
1486 transform: [f32; 12],
1487 custom_data_and_mask: u32,
1488 shader_binding_table_record_offset_and_flags: u32,
1489 acceleration_structure_reference: u64,
1490}
1491
1492/// Arguments to the [`CreateDeviceCallback`].
1493pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1494where
1495 'this: 'pnext,
1496{
1497 /// The extensions to enable for the device. You must not remove anything from this list,
1498 /// but you may add to it.
1499 pub extensions: &'arg mut Vec<&'static CStr>,
1500 /// The physical device features to enable. You may enable features, but must not disable any.
1501 pub device_features: &'arg mut PhysicalDeviceFeatures,
1502 /// The queue create infos for the device. You may add or modify queue create infos as needed.
1503 pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1504 /// The create info for the device. You may add or modify things in the pnext chain, but
1505 /// do not turn features off. Additionally, do not add things to the list of extensions,
1506 /// or to the feature set, as all changes to that member will be overwritten.
1507 pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1508 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1509 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1510 /// don't actually directly use `'this`
1511 _phantom: PhantomData<&'this ()>,
1512}
1513
1514/// Callback to allow changing the vulkan device creation parameters.
1515///
1516/// # Safety:
1517/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1518/// as the create info value will be overwritten.
1519/// - Callback must not remove features.
1520/// - Callback must not change anything to what the instance does not support.
1521pub type CreateDeviceCallback<'this> =
1522 dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1523
1524/// Arguments to the [`CreateInstanceCallback`].
1525pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1526where
1527 'this: 'pnext,
1528{
1529 /// The extensions to enable for the instance. You must not remove anything from this list,
1530 /// but you may add to it.
1531 pub extensions: &'arg mut Vec<&'static CStr>,
1532 /// The create info for the instance. You may add or modify things in the pnext chain, but
1533 /// do not turn features off. Additionally, do not add things to the list of extensions,
1534 /// all changes to that member will be overwritten.
1535 pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1536 /// Vulkan entry point.
1537 pub entry: &'arg ash::Entry,
1538 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1539 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1540 /// don't actually directly use `'this`
1541 _phantom: PhantomData<&'this ()>,
1542}
1543
1544/// Callback to allow changing the vulkan instance creation parameters.
1545///
1546/// # Safety:
1547/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1548/// as the create info value will be overwritten.
1549/// - Callback must not remove features.
1550/// - Callback must not change anything to what the instance does not support.
1551pub type CreateInstanceCallback<'this> =
1552 dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;