li_wgpu_core/device/
mod.rs

1use crate::{
2    binding_model,
3    hal_api::HalApi,
4    hub::Hub,
5    id,
6    identity::{GlobalIdentityHandlerFactory, Input},
7    resource::{Buffer, BufferAccessResult},
8    resource::{BufferAccessError, BufferMapOperation},
9    Label, DOWNLEVEL_ERROR_MESSAGE,
10};
11
12use arrayvec::ArrayVec;
13use hal::Device as _;
14use smallvec::SmallVec;
15use thiserror::Error;
16use wgt::{BufferAddress, TextureFormat};
17
18use std::{iter, num::NonZeroU32, ptr};
19
20pub mod global;
21mod life;
22pub mod queue;
23pub mod resource;
24#[cfg(any(feature = "trace", feature = "replay"))]
25pub mod trace;
26pub use {life::WaitIdleError, resource::Device};
27
28pub const SHADER_STAGE_COUNT: usize = 3;
29// Should be large enough for the largest possible texture row. This
30// value is enough for a 16k texture with float4 format.
31pub(crate) const ZERO_BUFFER_SIZE: BufferAddress = 512 << 10;
32
33const CLEANUP_WAIT_MS: u32 = 5000;
34
35const IMPLICIT_FAILURE: &str = "failed implicit";
36const EP_FAILURE: &str = "EP is invalid";
37
38pub type DeviceDescriptor<'a> = wgt::DeviceDescriptor<Label<'a>>;
39
40#[repr(C)]
41#[derive(Clone, Copy, Debug, Eq, PartialEq)]
42#[cfg_attr(feature = "trace", derive(serde::Serialize))]
43#[cfg_attr(feature = "replay", derive(serde::Deserialize))]
44pub enum HostMap {
45    Read,
46    Write,
47}
48
49#[derive(Clone, Debug, Hash, PartialEq)]
50#[cfg_attr(feature = "serial-pass", derive(serde::Deserialize, serde::Serialize))]
51pub(crate) struct AttachmentData<T> {
52    pub colors: ArrayVec<Option<T>, { hal::MAX_COLOR_ATTACHMENTS }>,
53    pub resolves: ArrayVec<T, { hal::MAX_COLOR_ATTACHMENTS }>,
54    pub depth_stencil: Option<T>,
55}
56impl<T: PartialEq> Eq for AttachmentData<T> {}
57impl<T> AttachmentData<T> {
58    pub(crate) fn map<U, F: Fn(&T) -> U>(&self, fun: F) -> AttachmentData<U> {
59        AttachmentData {
60            colors: self.colors.iter().map(|c| c.as_ref().map(&fun)).collect(),
61            resolves: self.resolves.iter().map(&fun).collect(),
62            depth_stencil: self.depth_stencil.as_ref().map(&fun),
63        }
64    }
65}
66
67#[derive(Debug, Copy, Clone)]
68pub enum RenderPassCompatibilityCheckType {
69    RenderPipeline,
70    RenderBundle,
71}
72
73#[derive(Clone, Debug, Hash, PartialEq)]
74#[cfg_attr(feature = "serial-pass", derive(serde::Deserialize, serde::Serialize))]
75pub(crate) struct RenderPassContext {
76    pub attachments: AttachmentData<TextureFormat>,
77    pub sample_count: u32,
78    pub multiview: Option<NonZeroU32>,
79}
80#[derive(Clone, Debug, Error)]
81#[non_exhaustive]
82pub enum RenderPassCompatibilityError {
83    #[error(
84        "Incompatible color attachments at indices {indices:?}: the RenderPass uses textures with formats {expected:?} but the {ty:?} uses attachments with formats {actual:?}",
85    )]
86    IncompatibleColorAttachment {
87        indices: Vec<usize>,
88        expected: Vec<Option<TextureFormat>>,
89        actual: Vec<Option<TextureFormat>>,
90        ty: RenderPassCompatibilityCheckType,
91    },
92    #[error(
93        "Incompatible depth-stencil attachment format: the RenderPass uses a texture with format {expected:?} but the {ty:?} uses an attachment with format {actual:?}",
94    )]
95    IncompatibleDepthStencilAttachment {
96        expected: Option<TextureFormat>,
97        actual: Option<TextureFormat>,
98        ty: RenderPassCompatibilityCheckType,
99    },
100    #[error(
101        "Incompatible sample count: the RenderPass uses textures with sample count {expected:?} but the {ty:?} uses attachments with format {actual:?}",
102    )]
103    IncompatibleSampleCount {
104        expected: u32,
105        actual: u32,
106        ty: RenderPassCompatibilityCheckType,
107    },
108    #[error("Incompatible multiview setting: the RenderPass uses setting {expected:?} but the {ty:?} uses setting {actual:?}")]
109    IncompatibleMultiview {
110        expected: Option<NonZeroU32>,
111        actual: Option<NonZeroU32>,
112        ty: RenderPassCompatibilityCheckType,
113    },
114}
115
116impl RenderPassContext {
117    // Assumes the renderpass only contains one subpass
118    pub(crate) fn check_compatible(
119        &self,
120        other: &Self,
121        ty: RenderPassCompatibilityCheckType,
122    ) -> Result<(), RenderPassCompatibilityError> {
123        if self.attachments.colors != other.attachments.colors {
124            let indices = self
125                .attachments
126                .colors
127                .iter()
128                .zip(&other.attachments.colors)
129                .enumerate()
130                .filter_map(|(idx, (left, right))| (left != right).then_some(idx))
131                .collect();
132            return Err(RenderPassCompatibilityError::IncompatibleColorAttachment {
133                indices,
134                expected: self.attachments.colors.iter().cloned().collect(),
135                actual: other.attachments.colors.iter().cloned().collect(),
136                ty,
137            });
138        }
139        if self.attachments.depth_stencil != other.attachments.depth_stencil {
140            return Err(
141                RenderPassCompatibilityError::IncompatibleDepthStencilAttachment {
142                    expected: self.attachments.depth_stencil,
143                    actual: other.attachments.depth_stencil,
144                    ty,
145                },
146            );
147        }
148        if self.sample_count != other.sample_count {
149            return Err(RenderPassCompatibilityError::IncompatibleSampleCount {
150                expected: self.sample_count,
151                actual: other.sample_count,
152                ty,
153            });
154        }
155        if self.multiview != other.multiview {
156            return Err(RenderPassCompatibilityError::IncompatibleMultiview {
157                expected: self.multiview,
158                actual: other.multiview,
159                ty,
160            });
161        }
162        Ok(())
163    }
164}
165
166pub type BufferMapPendingClosure = (BufferMapOperation, BufferAccessResult);
167
168#[derive(Default)]
169pub struct UserClosures {
170    pub mappings: Vec<BufferMapPendingClosure>,
171    pub submissions: SmallVec<[queue::SubmittedWorkDoneClosure; 1]>,
172}
173
174impl UserClosures {
175    fn extend(&mut self, other: Self) {
176        self.mappings.extend(other.mappings);
177        self.submissions.extend(other.submissions);
178    }
179
180    fn fire(self) {
181        // Note: this logic is specifically moved out of `handle_mapping()` in order to
182        // have nothing locked by the time we execute users callback code.
183
184        // Mappings _must_ be fired before submissions, as the spec requires all mapping callbacks that are registered before
185        // a on_submitted_work_done callback to be fired before the on_submitted_work_done callback.
186        for (operation, status) in self.mappings {
187            operation.callback.call(status);
188        }
189        for closure in self.submissions {
190            closure.call();
191        }
192    }
193}
194
195fn map_buffer<A: hal::Api>(
196    raw: &A::Device,
197    buffer: &mut Buffer<A>,
198    offset: BufferAddress,
199    size: BufferAddress,
200    kind: HostMap,
201) -> Result<ptr::NonNull<u8>, BufferAccessError> {
202    let mapping = unsafe {
203        raw.map_buffer(buffer.raw.as_ref().unwrap(), offset..offset + size)
204            .map_err(DeviceError::from)?
205    };
206
207    buffer.sync_mapped_writes = match kind {
208        HostMap::Read if !mapping.is_coherent => unsafe {
209            raw.invalidate_mapped_ranges(
210                buffer.raw.as_ref().unwrap(),
211                iter::once(offset..offset + size),
212            );
213            None
214        },
215        HostMap::Write if !mapping.is_coherent => Some(offset..offset + size),
216        _ => None,
217    };
218
219    assert_eq!(offset % wgt::COPY_BUFFER_ALIGNMENT, 0);
220    assert_eq!(size % wgt::COPY_BUFFER_ALIGNMENT, 0);
221    // Zero out uninitialized parts of the mapping. (Spec dictates all resources
222    // behave as if they were initialized with zero)
223    //
224    // If this is a read mapping, ideally we would use a `clear_buffer` command
225    // before reading the data from GPU (i.e. `invalidate_range`). However, this
226    // would require us to kick off and wait for a command buffer or piggy back
227    // on an existing one (the later is likely the only worthwhile option). As
228    // reading uninitialized memory isn't a particular important path to
229    // support, we instead just initialize the memory here and make sure it is
230    // GPU visible, so this happens at max only once for every buffer region.
231    //
232    // If this is a write mapping zeroing out the memory here is the only
233    // reasonable way as all data is pushed to GPU anyways.
234
235    // No need to flush if it is flushed later anyways.
236    let zero_init_needs_flush_now = mapping.is_coherent && buffer.sync_mapped_writes.is_none();
237    let mapped = unsafe { std::slice::from_raw_parts_mut(mapping.ptr.as_ptr(), size as usize) };
238
239    for uninitialized in buffer.initialization_status.drain(offset..(size + offset)) {
240        // The mapping's pointer is already offset, however we track the
241        // uninitialized range relative to the buffer's start.
242        let fill_range =
243            (uninitialized.start - offset) as usize..(uninitialized.end - offset) as usize;
244        mapped[fill_range].fill(0);
245
246        if zero_init_needs_flush_now {
247            unsafe {
248                raw.flush_mapped_ranges(buffer.raw.as_ref().unwrap(), iter::once(uninitialized))
249            };
250        }
251    }
252
253    Ok(mapping.ptr)
254}
255
256struct CommandAllocator<A: hal::Api> {
257    free_encoders: Vec<A::CommandEncoder>,
258}
259
260impl<A: hal::Api> CommandAllocator<A> {
261    fn acquire_encoder(
262        &mut self,
263        device: &A::Device,
264        queue: &A::Queue,
265    ) -> Result<A::CommandEncoder, hal::DeviceError> {
266        match self.free_encoders.pop() {
267            Some(encoder) => Ok(encoder),
268            None => unsafe {
269                let hal_desc = hal::CommandEncoderDescriptor { label: None, queue };
270                device.create_command_encoder(&hal_desc)
271            },
272        }
273    }
274
275    fn release_encoder(&mut self, encoder: A::CommandEncoder) {
276        self.free_encoders.push(encoder);
277    }
278
279    fn dispose(self, device: &A::Device) {
280        log::info!("Destroying {} command encoders", self.free_encoders.len());
281        for cmd_encoder in self.free_encoders {
282            unsafe {
283                device.destroy_command_encoder(cmd_encoder);
284            }
285        }
286    }
287}
288
289#[derive(Clone, Debug, Error)]
290#[error("Device is invalid")]
291pub struct InvalidDevice;
292
293#[derive(Clone, Debug, Error)]
294pub enum DeviceError {
295    #[error("Parent device is invalid.")]
296    Invalid,
297    #[error("Parent device is lost")]
298    Lost,
299    #[error("Not enough memory left.")]
300    OutOfMemory,
301    #[error("Creation of a resource failed for a reason other than running out of memory.")]
302    ResourceCreationFailed,
303    #[error("Attempt to use a resource with a different device from the one that created it")]
304    WrongDevice,
305}
306
307impl From<hal::DeviceError> for DeviceError {
308    fn from(error: hal::DeviceError) -> Self {
309        match error {
310            hal::DeviceError::Lost => DeviceError::Lost,
311            hal::DeviceError::OutOfMemory => DeviceError::OutOfMemory,
312            hal::DeviceError::ResourceCreationFailed => DeviceError::ResourceCreationFailed,
313        }
314    }
315}
316
317#[derive(Clone, Debug, Error)]
318#[error("Features {0:?} are required but not enabled on the device")]
319pub struct MissingFeatures(pub wgt::Features);
320
321#[derive(Clone, Debug, Error)]
322#[error(
323    "Downlevel flags {0:?} are required but not supported on the device.\n{}",
324    DOWNLEVEL_ERROR_MESSAGE
325)]
326pub struct MissingDownlevelFlags(pub wgt::DownlevelFlags);
327
328#[derive(Clone, Debug)]
329#[cfg_attr(feature = "trace", derive(serde::Serialize))]
330#[cfg_attr(feature = "replay", derive(serde::Deserialize))]
331pub struct ImplicitPipelineContext {
332    pub root_id: id::PipelineLayoutId,
333    pub group_ids: ArrayVec<id::BindGroupLayoutId, { hal::MAX_BIND_GROUPS }>,
334}
335
336pub struct ImplicitPipelineIds<'a, G: GlobalIdentityHandlerFactory> {
337    pub root_id: Input<G, id::PipelineLayoutId>,
338    pub group_ids: &'a [Input<G, id::BindGroupLayoutId>],
339}
340
341impl<G: GlobalIdentityHandlerFactory> ImplicitPipelineIds<'_, G> {
342    fn prepare<A: HalApi>(self, hub: &Hub<A, G>) -> ImplicitPipelineContext {
343        ImplicitPipelineContext {
344            root_id: hub.pipeline_layouts.prepare(self.root_id).into_id(),
345            group_ids: self
346                .group_ids
347                .iter()
348                .map(|id_in| hub.bind_group_layouts.prepare(id_in.clone()).into_id())
349                .collect(),
350        }
351    }
352}