li_wgpu_hal/vulkan/
mod.rs

1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8  - temporarily allocating `Vec` on heap, where overhead is permitted
9  - growing temporary local storage
10  - using `implace_it` on iterators
11
12## Framebuffers and Render passes
13
14Render passes are cached on the device and kept forever.
15
16Framebuffers are also cached on the device, but they are removed when
17any of the image views (they have) gets removed.
18If Vulkan supports image-less framebuffers,
19then the actual views are excluded from the framebuffer key.
20
21## Fences
22
23If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
24Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
25
26!*/
27
28mod adapter;
29mod command;
30mod conv;
31mod device;
32mod instance;
33
34use std::{borrow::Borrow, ffi::CStr, fmt, num::NonZeroU32, sync::Arc};
35
36use arrayvec::ArrayVec;
37use ash::{
38    extensions::{ext, khr},
39    vk,
40};
41use parking_lot::Mutex;
42
43const MILLIS_TO_NANOS: u64 = 1_000_000;
44const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
45
46#[derive(Clone, Debug)]
47pub struct Api;
48
49impl crate::Api for Api {
50    type Instance = Instance;
51    type Surface = Surface;
52    type Adapter = Adapter;
53    type Device = Device;
54
55    type Queue = Queue;
56    type CommandEncoder = CommandEncoder;
57    type CommandBuffer = CommandBuffer;
58
59    type Buffer = Buffer;
60    type Texture = Texture;
61    type SurfaceTexture = SurfaceTexture;
62    type TextureView = TextureView;
63    type Sampler = Sampler;
64    type QuerySet = QuerySet;
65    type Fence = Fence;
66
67    type BindGroupLayout = BindGroupLayout;
68    type BindGroup = BindGroup;
69    type PipelineLayout = PipelineLayout;
70    type ShaderModule = ShaderModule;
71    type RenderPipeline = RenderPipeline;
72    type ComputePipeline = ComputePipeline;
73}
74
75struct DebugUtils {
76    extension: ext::DebugUtils,
77    messenger: vk::DebugUtilsMessengerEXT,
78
79    /// Owning pointer to the debug messenger callback user data.
80    ///
81    /// `InstanceShared::drop` destroys the debug messenger before
82    /// dropping this, so the callback should never receive a dangling
83    /// user data pointer.
84    #[allow(dead_code)]
85    callback_data: Box<DebugUtilsMessengerUserData>,
86}
87
88/// User data needed by `instance::debug_utils_messenger_callback`.
89///
90/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
91/// pointer refers to one of these values.
92#[derive(Debug)]
93pub struct DebugUtilsMessengerUserData {
94    /// Validation layer description, from `vk::LayerProperties`.
95    validation_layer_description: std::ffi::CString,
96
97    /// Validation layer specification version, from `vk::LayerProperties`.
98    validation_layer_spec_version: u32,
99
100    /// If the OBS layer is present. OBS never increments the version of their layer,
101    /// so there's no reason to have the version.
102    has_obs_layer: bool,
103}
104
105pub struct InstanceShared {
106    raw: ash::Instance,
107    extensions: Vec<&'static CStr>,
108    drop_guard: Option<crate::DropGuard>,
109    flags: wgt::InstanceFlags,
110    debug_utils: Option<DebugUtils>,
111    get_physical_device_properties: Option<khr::GetPhysicalDeviceProperties2>,
112    entry: ash::Entry,
113    has_nv_optimus: bool,
114    android_sdk_version: u32,
115    /// The instance API version.
116    ///
117    /// Which is the version of Vulkan supported for instance-level functionality.
118    ///
119    /// It is associated with a `VkInstance` and its children,
120    /// except for a `VkPhysicalDevice` and its children.
121    instance_api_version: u32,
122}
123
124pub struct Instance {
125    shared: Arc<InstanceShared>,
126}
127
128struct Swapchain {
129    raw: vk::SwapchainKHR,
130    raw_flags: vk::SwapchainCreateFlagsKHR,
131    functor: khr::Swapchain,
132    device: Arc<DeviceShared>,
133    fence: vk::Fence,
134    images: Vec<vk::Image>,
135    config: crate::SurfaceConfiguration,
136    view_formats: Vec<wgt::TextureFormat>,
137}
138
139pub struct Surface {
140    raw: vk::SurfaceKHR,
141    functor: khr::Surface,
142    instance: Arc<InstanceShared>,
143    swapchain: Option<Swapchain>,
144}
145
146#[derive(Debug)]
147pub struct SurfaceTexture {
148    index: u32,
149    texture: Texture,
150}
151
152impl Borrow<Texture> for SurfaceTexture {
153    fn borrow(&self) -> &Texture {
154        &self.texture
155    }
156}
157
158pub struct Adapter {
159    raw: vk::PhysicalDevice,
160    instance: Arc<InstanceShared>,
161    //queue_families: Vec<vk::QueueFamilyProperties>,
162    known_memory_flags: vk::MemoryPropertyFlags,
163    phd_capabilities: adapter::PhysicalDeviceCapabilities,
164    //phd_features: adapter::PhysicalDeviceFeatures,
165    downlevel_flags: wgt::DownlevelFlags,
166    private_caps: PrivateCapabilities,
167    workarounds: Workarounds,
168}
169
170// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
171enum ExtensionFn<T> {
172    /// The loaded function pointer struct for an extension.
173    Extension(T),
174    /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
175    Promoted,
176}
177
178struct DeviceExtensionFunctions {
179    draw_indirect_count: Option<khr::DrawIndirectCount>,
180    timeline_semaphore: Option<ExtensionFn<khr::TimelineSemaphore>>,
181}
182
183/// Set of internal capabilities, which don't show up in the exposed
184/// device geometry, but affect the code paths taken internally.
185#[derive(Clone, Debug)]
186struct PrivateCapabilities {
187    /// Y-flipping is implemented with either `VK_AMD_negative_viewport_height` or `VK_KHR_maintenance1`/1.1+. The AMD extension for negative viewport height does not require a Y shift.
188    ///
189    /// This flag is `true` if the device has `VK_KHR_maintenance1`/1.1+ and `false` otherwise (i.e. in the case of `VK_AMD_negative_viewport_height`).
190    flip_y_requires_shift: bool,
191    imageless_framebuffers: bool,
192    image_view_usage: bool,
193    timeline_semaphores: bool,
194    texture_d24: bool,
195    texture_d24_s8: bool,
196    texture_s8: bool,
197    /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
198    can_present: bool,
199    non_coherent_map_mask: wgt::BufferAddress,
200    robust_buffer_access: bool,
201    robust_image_access: bool,
202    robust_buffer_access2: bool,
203    robust_image_access2: bool,
204    zero_initialize_workgroup_memory: bool,
205    image_format_list: bool,
206}
207
208bitflags::bitflags!(
209    /// Workaround flags.
210    #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
211    pub struct Workarounds: u32 {
212        /// Only generate SPIR-V for one entry point at a time.
213        const SEPARATE_ENTRY_POINTS = 0x1;
214        /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
215        /// to a subpass resolve attachment array. This nulls out that pointer in that case.
216        const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
217        /// If the following code returns false, then nvidia will end up filling the wrong range.
218        ///
219        /// ```skip
220        /// fn nvidia_succeeds() -> bool {
221        ///   # let (copy_length, start_offset) = (0, 0);
222        ///     if copy_length >= 4096 {
223        ///         if start_offset % 16 != 0 {
224        ///             if copy_length == 4096 {
225        ///                 return true;
226        ///             }
227        ///             if copy_length % 16 == 0 {
228        ///                 return false;
229        ///             }
230        ///         }
231        ///     }
232        ///     true
233        /// }
234        /// ```
235        ///
236        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
237        /// if they cover a range of 4096 bytes or more.
238        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
239    }
240);
241
242#[derive(Clone, Debug, Eq, Hash, PartialEq)]
243struct AttachmentKey {
244    format: vk::Format,
245    layout: vk::ImageLayout,
246    ops: crate::AttachmentOps,
247}
248
249impl AttachmentKey {
250    /// Returns an attachment key for a compatible attachment.
251    fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
252        Self {
253            format,
254            layout,
255            ops: crate::AttachmentOps::all(),
256        }
257    }
258}
259
260#[derive(Clone, Eq, Hash, PartialEq)]
261struct ColorAttachmentKey {
262    base: AttachmentKey,
263    resolve: Option<AttachmentKey>,
264}
265
266#[derive(Clone, Eq, Hash, PartialEq)]
267struct DepthStencilAttachmentKey {
268    base: AttachmentKey,
269    stencil_ops: crate::AttachmentOps,
270}
271
272#[derive(Clone, Eq, Default, Hash, PartialEq)]
273struct RenderPassKey {
274    colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
275    depth_stencil: Option<DepthStencilAttachmentKey>,
276    sample_count: u32,
277    multiview: Option<NonZeroU32>,
278}
279
280#[derive(Clone, Debug, Eq, Hash, PartialEq)]
281struct FramebufferAttachment {
282    /// Can be NULL if the framebuffer is image-less
283    raw: vk::ImageView,
284    raw_image_flags: vk::ImageCreateFlags,
285    view_usage: crate::TextureUses,
286    view_format: wgt::TextureFormat,
287    raw_view_formats: Vec<vk::Format>,
288}
289
290#[derive(Clone, Eq, Hash, PartialEq)]
291struct FramebufferKey {
292    attachments: ArrayVec<FramebufferAttachment, { MAX_TOTAL_ATTACHMENTS }>,
293    extent: wgt::Extent3d,
294    sample_count: u32,
295}
296
297struct DeviceShared {
298    raw: ash::Device,
299    family_index: u32,
300    queue_index: u32,
301    raw_queue: ash::vk::Queue,
302    handle_is_owned: bool,
303    instance: Arc<InstanceShared>,
304    physical_device: ash::vk::PhysicalDevice,
305    enabled_extensions: Vec<&'static CStr>,
306    extension_fns: DeviceExtensionFunctions,
307    vendor_id: u32,
308    timestamp_period: f32,
309    private_caps: PrivateCapabilities,
310    workarounds: Workarounds,
311    render_passes: Mutex<rustc_hash::FxHashMap<RenderPassKey, vk::RenderPass>>,
312    framebuffers: Mutex<rustc_hash::FxHashMap<FramebufferKey, vk::Framebuffer>>,
313}
314
315pub struct Device {
316    shared: Arc<DeviceShared>,
317    mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
318    desc_allocator:
319        Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
320    valid_ash_memory_types: u32,
321    naga_options: naga::back::spv::Options<'static>,
322    #[cfg(feature = "renderdoc")]
323    render_doc: crate::auxil::renderdoc::RenderDoc,
324}
325
326pub struct Queue {
327    raw: vk::Queue,
328    swapchain_fn: khr::Swapchain,
329    device: Arc<DeviceShared>,
330    family_index: u32,
331    /// We use a redundant chain of semaphores to pass on the signal
332    /// from submissions to the last present, since it's required by the
333    /// specification.
334    /// It would be correct to use a single semaphore there, but
335    /// [Intel hangs in `anv_queue_finish`](https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508).
336    relay_semaphores: [vk::Semaphore; 2],
337    relay_index: Option<usize>,
338}
339
340#[derive(Debug)]
341pub struct Buffer {
342    raw: vk::Buffer,
343    block: Option<Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>>,
344}
345
346#[derive(Debug)]
347pub struct Texture {
348    raw: vk::Image,
349    drop_guard: Option<crate::DropGuard>,
350    block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
351    usage: crate::TextureUses,
352    format: wgt::TextureFormat,
353    raw_flags: vk::ImageCreateFlags,
354    copy_size: crate::CopyExtent,
355    view_formats: Vec<wgt::TextureFormat>,
356}
357
358impl Texture {
359    /// # Safety
360    ///
361    /// - The image handle must not be manually destroyed
362    pub unsafe fn raw_handle(&self) -> vk::Image {
363        self.raw
364    }
365}
366
367#[derive(Debug)]
368pub struct TextureView {
369    raw: vk::ImageView,
370    layers: NonZeroU32,
371    attachment: FramebufferAttachment,
372}
373
374#[derive(Debug)]
375pub struct Sampler {
376    raw: vk::Sampler,
377}
378
379#[derive(Debug)]
380pub struct BindGroupLayout {
381    raw: vk::DescriptorSetLayout,
382    desc_count: gpu_descriptor::DescriptorTotalCount,
383    types: Box<[(vk::DescriptorType, u32)]>,
384    /// Map of binding index to size,
385    binding_arrays: Vec<(u32, NonZeroU32)>,
386}
387
388#[derive(Debug)]
389pub struct PipelineLayout {
390    raw: vk::PipelineLayout,
391    binding_arrays: naga::back::spv::BindingMap,
392}
393
394#[derive(Debug)]
395pub struct BindGroup {
396    set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
397}
398
399#[derive(Default)]
400struct Temp {
401    marker: Vec<u8>,
402    buffer_barriers: Vec<vk::BufferMemoryBarrier>,
403    image_barriers: Vec<vk::ImageMemoryBarrier>,
404}
405
406unsafe impl Send for Temp {}
407unsafe impl Sync for Temp {}
408
409impl Temp {
410    fn clear(&mut self) {
411        self.marker.clear();
412        self.buffer_barriers.clear();
413        self.image_barriers.clear();
414        //see also - https://github.com/NotIntMan/inplace_it/issues/8
415    }
416
417    fn make_c_str(&mut self, name: &str) -> &CStr {
418        self.marker.clear();
419        self.marker.extend_from_slice(name.as_bytes());
420        self.marker.push(0);
421        unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
422    }
423}
424
425pub struct CommandEncoder {
426    raw: vk::CommandPool,
427    device: Arc<DeviceShared>,
428    active: vk::CommandBuffer,
429    bind_point: vk::PipelineBindPoint,
430    temp: Temp,
431    free: Vec<vk::CommandBuffer>,
432    discarded: Vec<vk::CommandBuffer>,
433    /// If this is true, the active renderpass enabled a debug span,
434    /// and needs to be disabled on renderpass close.
435    rpass_debug_marker_active: bool,
436
437    /// If set, the end of the next render/compute pass will write a timestamp at
438    /// the given pool & location.
439    end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
440}
441
442impl fmt::Debug for CommandEncoder {
443    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
444        f.debug_struct("CommandEncoder")
445            .field("raw", &self.raw)
446            .finish()
447    }
448}
449
450#[derive(Debug)]
451pub struct CommandBuffer {
452    raw: vk::CommandBuffer,
453}
454
455#[derive(Debug)]
456#[allow(clippy::large_enum_variant)]
457pub enum ShaderModule {
458    Raw(vk::ShaderModule),
459    Intermediate {
460        naga_shader: crate::NagaShader,
461        runtime_checks: bool,
462    },
463}
464
465#[derive(Debug)]
466pub struct RenderPipeline {
467    raw: vk::Pipeline,
468}
469
470#[derive(Debug)]
471pub struct ComputePipeline {
472    raw: vk::Pipeline,
473}
474
475#[derive(Debug)]
476pub struct QuerySet {
477    raw: vk::QueryPool,
478}
479
480#[derive(Debug)]
481pub enum Fence {
482    TimelineSemaphore(vk::Semaphore),
483    FencePool {
484        last_completed: crate::FenceValue,
485        /// The pending fence values have to be ascending.
486        active: Vec<(crate::FenceValue, vk::Fence)>,
487        free: Vec<vk::Fence>,
488    },
489}
490
491impl Fence {
492    fn check_active(
493        device: &ash::Device,
494        mut max_value: crate::FenceValue,
495        active: &[(crate::FenceValue, vk::Fence)],
496    ) -> Result<crate::FenceValue, crate::DeviceError> {
497        for &(value, raw) in active.iter() {
498            unsafe {
499                if value > max_value && device.get_fence_status(raw)? {
500                    max_value = value;
501                }
502            }
503        }
504        Ok(max_value)
505    }
506
507    fn get_latest(
508        &self,
509        device: &ash::Device,
510        extension: Option<&ExtensionFn<khr::TimelineSemaphore>>,
511    ) -> Result<crate::FenceValue, crate::DeviceError> {
512        match *self {
513            Self::TimelineSemaphore(raw) => unsafe {
514                Ok(match *extension.unwrap() {
515                    ExtensionFn::Extension(ref ext) => ext.get_semaphore_counter_value(raw)?,
516                    ExtensionFn::Promoted => device.get_semaphore_counter_value(raw)?,
517                })
518            },
519            Self::FencePool {
520                last_completed,
521                ref active,
522                free: _,
523            } => Self::check_active(device, last_completed, active),
524        }
525    }
526
527    fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
528        match *self {
529            Self::TimelineSemaphore(_) => {}
530            Self::FencePool {
531                ref mut last_completed,
532                ref mut active,
533                ref mut free,
534            } => {
535                let latest = Self::check_active(device, *last_completed, active)?;
536                let base_free = free.len();
537                for &(value, raw) in active.iter() {
538                    if value <= latest {
539                        free.push(raw);
540                    }
541                }
542                if free.len() != base_free {
543                    active.retain(|&(value, _)| value > latest);
544                    unsafe {
545                        device.reset_fences(&free[base_free..])?;
546                    }
547                }
548                *last_completed = latest;
549            }
550        }
551        Ok(())
552    }
553}
554
555impl crate::Queue<Api> for Queue {
556    unsafe fn submit(
557        &mut self,
558        command_buffers: &[&CommandBuffer],
559        signal_fence: Option<(&mut Fence, crate::FenceValue)>,
560    ) -> Result<(), crate::DeviceError> {
561        let vk_cmd_buffers = command_buffers
562            .iter()
563            .map(|cmd| cmd.raw)
564            .collect::<Vec<_>>();
565
566        let mut vk_info = vk::SubmitInfo::builder().command_buffers(&vk_cmd_buffers);
567
568        let mut fence_raw = vk::Fence::null();
569        let mut vk_timeline_info;
570        let mut signal_semaphores = [vk::Semaphore::null(), vk::Semaphore::null()];
571        let signal_values;
572
573        if let Some((fence, value)) = signal_fence {
574            fence.maintain(&self.device.raw)?;
575            match *fence {
576                Fence::TimelineSemaphore(raw) => {
577                    signal_values = [!0, value];
578                    signal_semaphores[1] = raw;
579                    vk_timeline_info = vk::TimelineSemaphoreSubmitInfo::builder()
580                        .signal_semaphore_values(&signal_values);
581                    vk_info = vk_info.push_next(&mut vk_timeline_info);
582                }
583                Fence::FencePool {
584                    ref mut active,
585                    ref mut free,
586                    ..
587                } => {
588                    fence_raw = match free.pop() {
589                        Some(raw) => raw,
590                        None => unsafe {
591                            self.device
592                                .raw
593                                .create_fence(&vk::FenceCreateInfo::builder(), None)?
594                        },
595                    };
596                    active.push((value, fence_raw));
597                }
598            }
599        }
600
601        let wait_stage_mask = [vk::PipelineStageFlags::TOP_OF_PIPE];
602        let sem_index = match self.relay_index {
603            Some(old_index) => {
604                vk_info = vk_info
605                    .wait_semaphores(&self.relay_semaphores[old_index..old_index + 1])
606                    .wait_dst_stage_mask(&wait_stage_mask);
607                (old_index + 1) % self.relay_semaphores.len()
608            }
609            None => 0,
610        };
611        self.relay_index = Some(sem_index);
612        signal_semaphores[0] = self.relay_semaphores[sem_index];
613
614        let signal_count = if signal_semaphores[1] == vk::Semaphore::null() {
615            1
616        } else {
617            2
618        };
619        vk_info = vk_info.signal_semaphores(&signal_semaphores[..signal_count]);
620
621        profiling::scope!("vkQueueSubmit");
622        unsafe {
623            self.device
624                .raw
625                .queue_submit(self.raw, &[vk_info.build()], fence_raw)?
626        };
627        Ok(())
628    }
629
630    unsafe fn present(
631        &mut self,
632        surface: &mut Surface,
633        texture: SurfaceTexture,
634    ) -> Result<(), crate::SurfaceError> {
635        let ssc = surface.swapchain.as_ref().unwrap();
636
637        let swapchains = [ssc.raw];
638        let image_indices = [texture.index];
639        let mut vk_info = vk::PresentInfoKHR::builder()
640            .swapchains(&swapchains)
641            .image_indices(&image_indices);
642
643        if let Some(old_index) = self.relay_index.take() {
644            vk_info = vk_info.wait_semaphores(&self.relay_semaphores[old_index..old_index + 1]);
645        }
646
647        let suboptimal = {
648            profiling::scope!("vkQueuePresentKHR");
649            unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
650                match error {
651                    vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
652                    vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
653                    _ => crate::DeviceError::from(error).into(),
654                }
655            })?
656        };
657        if suboptimal {
658            // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
659            // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
660            // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
661            // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
662            #[cfg(not(target_os = "android"))]
663            log::warn!("Suboptimal present of frame {}", texture.index);
664        }
665        Ok(())
666    }
667
668    unsafe fn get_timestamp_period(&self) -> f32 {
669        self.device.timestamp_period
670    }
671}
672
673impl From<vk::Result> for crate::DeviceError {
674    fn from(result: vk::Result) -> Self {
675        match result {
676            vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
677                Self::OutOfMemory
678            }
679            vk::Result::ERROR_DEVICE_LOST => Self::Lost,
680            _ => {
681                log::warn!("Unrecognized device error {:?}", result);
682                Self::Lost
683            }
684        }
685    }
686}