wgpu_core/device/
mod.rs

1use alloc::{boxed::Box, string::String, vec::Vec};
2use core::{fmt, num::NonZeroU32};
3
4use crate::{
5    binding_model,
6    ray_tracing::BlasCompactReadyPendingClosure,
7    resource::{
8        Buffer, BufferAccessError, BufferAccessResult, BufferMapOperation, Labeled,
9        RawResourceAccess, ResourceErrorIdent,
10    },
11    snatch::SnatchGuard,
12    Label, DOWNLEVEL_ERROR_MESSAGE,
13};
14
15use arrayvec::ArrayVec;
16use smallvec::SmallVec;
17use thiserror::Error;
18use wgt::{
19    error::{ErrorType, WebGpuError},
20    BufferAddress, DeviceLostReason, TextureFormat,
21};
22
23pub(crate) mod bgl;
24pub mod global;
25mod life;
26pub mod queue;
27pub mod ray_tracing;
28pub mod resource;
29#[cfg(any(feature = "trace", feature = "replay"))]
30pub mod trace;
31pub use {life::WaitIdleError, resource::Device};
32
33pub const SHADER_STAGE_COUNT: usize = hal::MAX_CONCURRENT_SHADER_STAGES;
34// Should be large enough for the largest possible texture row. This
35// value is enough for a 16k texture with float4 format.
36pub(crate) const ZERO_BUFFER_SIZE: BufferAddress = 512 << 10;
37
38pub(crate) const ENTRYPOINT_FAILURE_ERROR: &str = "The given EntryPoint is Invalid";
39
40pub type DeviceDescriptor<'a> = wgt::DeviceDescriptor<Label<'a>>;
41
42#[repr(C)]
43#[derive(Clone, Copy, Debug, Eq, PartialEq)]
44#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
45pub enum HostMap {
46    Read,
47    Write,
48}
49
50#[derive(Clone, Debug, Hash, PartialEq)]
51#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
52pub(crate) struct AttachmentData<T> {
53    pub colors: ArrayVec<Option<T>, { hal::MAX_COLOR_ATTACHMENTS }>,
54    pub resolves: ArrayVec<T, { hal::MAX_COLOR_ATTACHMENTS }>,
55    pub depth_stencil: Option<T>,
56}
57impl<T: PartialEq> Eq for AttachmentData<T> {}
58
59#[derive(Clone, Debug, Hash, PartialEq)]
60#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
61pub(crate) struct RenderPassContext {
62    pub attachments: AttachmentData<TextureFormat>,
63    pub sample_count: u32,
64    pub multiview: Option<NonZeroU32>,
65}
66#[derive(Clone, Debug, Error)]
67#[non_exhaustive]
68pub enum RenderPassCompatibilityError {
69    #[error(
70        "Incompatible color attachments at indices {indices:?}: the RenderPass uses textures with formats {expected:?} but the {res} uses attachments with formats {actual:?}",
71    )]
72    IncompatibleColorAttachment {
73        indices: Vec<usize>,
74        expected: Vec<Option<TextureFormat>>,
75        actual: Vec<Option<TextureFormat>>,
76        res: ResourceErrorIdent,
77    },
78    #[error(
79        "Incompatible depth-stencil attachment format: the RenderPass uses a texture with format {expected:?} but the {res} uses an attachment with format {actual:?}",
80    )]
81    IncompatibleDepthStencilAttachment {
82        expected: Option<TextureFormat>,
83        actual: Option<TextureFormat>,
84        res: ResourceErrorIdent,
85    },
86    #[error(
87        "Incompatible sample count: the RenderPass uses textures with sample count {expected:?} but the {res} uses attachments with format {actual:?}",
88    )]
89    IncompatibleSampleCount {
90        expected: u32,
91        actual: u32,
92        res: ResourceErrorIdent,
93    },
94    #[error("Incompatible multiview setting: the RenderPass uses setting {expected:?} but the {res} uses setting {actual:?}")]
95    IncompatibleMultiview {
96        expected: Option<NonZeroU32>,
97        actual: Option<NonZeroU32>,
98        res: ResourceErrorIdent,
99    },
100}
101
102impl WebGpuError for RenderPassCompatibilityError {
103    fn webgpu_error_type(&self) -> ErrorType {
104        ErrorType::Validation
105    }
106}
107
108impl RenderPassContext {
109    // Assumes the renderpass only contains one subpass
110    pub(crate) fn check_compatible<T: Labeled>(
111        &self,
112        other: &Self,
113        res: &T,
114    ) -> Result<(), RenderPassCompatibilityError> {
115        if self.attachments.colors != other.attachments.colors {
116            let indices = self
117                .attachments
118                .colors
119                .iter()
120                .zip(&other.attachments.colors)
121                .enumerate()
122                .filter_map(|(idx, (left, right))| (left != right).then_some(idx))
123                .collect();
124            return Err(RenderPassCompatibilityError::IncompatibleColorAttachment {
125                indices,
126                expected: self.attachments.colors.iter().cloned().collect(),
127                actual: other.attachments.colors.iter().cloned().collect(),
128                res: res.error_ident(),
129            });
130        }
131        if self.attachments.depth_stencil != other.attachments.depth_stencil {
132            return Err(
133                RenderPassCompatibilityError::IncompatibleDepthStencilAttachment {
134                    expected: self.attachments.depth_stencil,
135                    actual: other.attachments.depth_stencil,
136                    res: res.error_ident(),
137                },
138            );
139        }
140        if self.sample_count != other.sample_count {
141            return Err(RenderPassCompatibilityError::IncompatibleSampleCount {
142                expected: self.sample_count,
143                actual: other.sample_count,
144                res: res.error_ident(),
145            });
146        }
147        if self.multiview != other.multiview {
148            return Err(RenderPassCompatibilityError::IncompatibleMultiview {
149                expected: self.multiview,
150                actual: other.multiview,
151                res: res.error_ident(),
152            });
153        }
154        Ok(())
155    }
156}
157
158pub type BufferMapPendingClosure = (BufferMapOperation, BufferAccessResult);
159
160#[derive(Default)]
161pub struct UserClosures {
162    pub mappings: Vec<BufferMapPendingClosure>,
163    pub blas_compact_ready: Vec<BlasCompactReadyPendingClosure>,
164    pub submissions: SmallVec<[queue::SubmittedWorkDoneClosure; 1]>,
165    pub device_lost_invocations: SmallVec<[DeviceLostInvocation; 1]>,
166}
167
168impl UserClosures {
169    fn extend(&mut self, other: Self) {
170        self.mappings.extend(other.mappings);
171        self.blas_compact_ready.extend(other.blas_compact_ready);
172        self.submissions.extend(other.submissions);
173        self.device_lost_invocations
174            .extend(other.device_lost_invocations);
175    }
176
177    fn fire(self) {
178        // Note: this logic is specifically moved out of `handle_mapping()` in order to
179        // have nothing locked by the time we execute users callback code.
180
181        // Mappings _must_ be fired before submissions, as the spec requires all mapping callbacks that are registered before
182        // a on_submitted_work_done callback to be fired before the on_submitted_work_done callback.
183        for (mut operation, status) in self.mappings {
184            if let Some(callback) = operation.callback.take() {
185                callback(status);
186            }
187        }
188        for (mut operation, status) in self.blas_compact_ready {
189            if let Some(callback) = operation.take() {
190                callback(status);
191            }
192        }
193        for closure in self.submissions {
194            closure();
195        }
196        for invocation in self.device_lost_invocations {
197            (invocation.closure)(invocation.reason, invocation.message);
198        }
199    }
200}
201
202#[cfg(send_sync)]
203pub type DeviceLostClosure = Box<dyn FnOnce(DeviceLostReason, String) + Send + 'static>;
204#[cfg(not(send_sync))]
205pub type DeviceLostClosure = Box<dyn FnOnce(DeviceLostReason, String) + 'static>;
206
207pub struct DeviceLostInvocation {
208    closure: DeviceLostClosure,
209    reason: DeviceLostReason,
210    message: String,
211}
212
213pub(crate) fn map_buffer(
214    buffer: &Buffer,
215    offset: BufferAddress,
216    size: BufferAddress,
217    kind: HostMap,
218    snatch_guard: &SnatchGuard,
219) -> Result<hal::BufferMapping, BufferAccessError> {
220    let raw_device = buffer.device.raw();
221    let raw_buffer = buffer.try_raw(snatch_guard)?;
222    let mapping = unsafe {
223        raw_device
224            .map_buffer(raw_buffer, offset..offset + size)
225            .map_err(|e| buffer.device.handle_hal_error(e))?
226    };
227
228    if !mapping.is_coherent && kind == HostMap::Read {
229        #[allow(clippy::single_range_in_vec_init)]
230        unsafe {
231            raw_device.invalidate_mapped_ranges(raw_buffer, &[offset..offset + size]);
232        }
233    }
234
235    assert_eq!(offset % wgt::COPY_BUFFER_ALIGNMENT, 0);
236    assert_eq!(size % wgt::COPY_BUFFER_ALIGNMENT, 0);
237    // Zero out uninitialized parts of the mapping. (Spec dictates all resources
238    // behave as if they were initialized with zero)
239    //
240    // If this is a read mapping, ideally we would use a `clear_buffer` command
241    // before reading the data from GPU (i.e. `invalidate_range`). However, this
242    // would require us to kick off and wait for a command buffer or piggy back
243    // on an existing one (the later is likely the only worthwhile option). As
244    // reading uninitialized memory isn't a particular important path to
245    // support, we instead just initialize the memory here and make sure it is
246    // GPU visible, so this happens at max only once for every buffer region.
247    //
248    // If this is a write mapping zeroing out the memory here is the only
249    // reasonable way as all data is pushed to GPU anyways.
250
251    let mapped = unsafe { core::slice::from_raw_parts_mut(mapping.ptr.as_ptr(), size as usize) };
252
253    // We can't call flush_mapped_ranges in this case, so we can't drain the uninitialized ranges either
254    if !mapping.is_coherent
255        && kind == HostMap::Read
256        && !buffer.usage.contains(wgt::BufferUsages::MAP_WRITE)
257    {
258        for uninitialized in buffer
259            .initialization_status
260            .write()
261            .uninitialized(offset..(size + offset))
262        {
263            // The mapping's pointer is already offset, however we track the
264            // uninitialized range relative to the buffer's start.
265            let fill_range =
266                (uninitialized.start - offset) as usize..(uninitialized.end - offset) as usize;
267            mapped[fill_range].fill(0);
268        }
269    } else {
270        for uninitialized in buffer
271            .initialization_status
272            .write()
273            .drain(offset..(size + offset))
274        {
275            // The mapping's pointer is already offset, however we track the
276            // uninitialized range relative to the buffer's start.
277            let fill_range =
278                (uninitialized.start - offset) as usize..(uninitialized.end - offset) as usize;
279            mapped[fill_range].fill(0);
280
281            // NOTE: This is only possible when MAPPABLE_PRIMARY_BUFFERS is enabled.
282            if !mapping.is_coherent
283                && kind == HostMap::Read
284                && buffer.usage.contains(wgt::BufferUsages::MAP_WRITE)
285            {
286                unsafe { raw_device.flush_mapped_ranges(raw_buffer, &[uninitialized]) };
287            }
288        }
289    }
290
291    Ok(mapping)
292}
293
294#[derive(Clone, Debug)]
295#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
296pub struct DeviceMismatch {
297    pub(super) res: ResourceErrorIdent,
298    pub(super) res_device: ResourceErrorIdent,
299    pub(super) target: Option<ResourceErrorIdent>,
300    pub(super) target_device: ResourceErrorIdent,
301}
302
303impl fmt::Display for DeviceMismatch {
304    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
305        write!(
306            f,
307            "{} of {} doesn't match {}",
308            self.res_device, self.res, self.target_device
309        )?;
310        if let Some(target) = self.target.as_ref() {
311            write!(f, " of {target}")?;
312        }
313        Ok(())
314    }
315}
316
317impl core::error::Error for DeviceMismatch {}
318
319impl WebGpuError for DeviceMismatch {
320    fn webgpu_error_type(&self) -> ErrorType {
321        ErrorType::Validation
322    }
323}
324
325#[derive(Clone, Debug, Error)]
326#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
327#[non_exhaustive]
328pub enum DeviceError {
329    #[error("Parent device is lost")]
330    Lost,
331    #[error("Not enough memory left.")]
332    OutOfMemory,
333    #[error(transparent)]
334    DeviceMismatch(#[from] Box<DeviceMismatch>),
335}
336
337impl WebGpuError for DeviceError {
338    fn webgpu_error_type(&self) -> ErrorType {
339        match self {
340            Self::DeviceMismatch(e) => e.webgpu_error_type(),
341            Self::Lost => ErrorType::DeviceLost,
342            Self::OutOfMemory => ErrorType::OutOfMemory,
343        }
344    }
345}
346
347impl DeviceError {
348    /// Only use this function in contexts where there is no `Device`.
349    ///
350    /// Use [`Device::handle_hal_error`] otherwise.
351    pub fn from_hal(error: hal::DeviceError) -> Self {
352        match error {
353            hal::DeviceError::Lost => Self::Lost,
354            hal::DeviceError::OutOfMemory => Self::OutOfMemory,
355            hal::DeviceError::Unexpected => Self::Lost,
356        }
357    }
358}
359
360#[derive(Clone, Debug, Error)]
361#[error("Features {0:?} are required but not enabled on the device")]
362pub struct MissingFeatures(pub wgt::Features);
363
364impl WebGpuError for MissingFeatures {
365    fn webgpu_error_type(&self) -> ErrorType {
366        ErrorType::Validation
367    }
368}
369
370#[derive(Clone, Debug, Error)]
371#[error(
372    "Downlevel flags {0:?} are required but not supported on the device.\n{DOWNLEVEL_ERROR_MESSAGE}",
373)]
374pub struct MissingDownlevelFlags(pub wgt::DownlevelFlags);
375
376impl WebGpuError for MissingDownlevelFlags {
377    fn webgpu_error_type(&self) -> ErrorType {
378        ErrorType::Validation
379    }
380}
381
382/// Create a validator for Naga [`Module`]s.
383///
384/// Create a Naga [`Validator`] that ensures that each [`naga::Module`]
385/// presented to it is valid, and uses no features not included in
386/// `features` and `downlevel`.
387///
388/// The validator can only catch invalid modules and feature misuse
389/// reliably when the `flags` argument includes all the flags in
390/// [`ValidationFlags::default()`].
391///
392/// [`Validator`]: naga::valid::Validator
393/// [`Module`]: naga::Module
394/// [`ValidationFlags::default()`]: naga::valid::ValidationFlags::default
395pub fn create_validator(
396    features: wgt::Features,
397    downlevel: wgt::DownlevelFlags,
398    flags: naga::valid::ValidationFlags,
399) -> naga::valid::Validator {
400    use naga::valid::Capabilities as Caps;
401    let mut caps = Caps::empty();
402    caps.set(
403        Caps::PUSH_CONSTANT,
404        features.contains(wgt::Features::PUSH_CONSTANTS),
405    );
406    caps.set(Caps::FLOAT64, features.contains(wgt::Features::SHADER_F64));
407    caps.set(
408        Caps::SHADER_FLOAT16,
409        features.contains(wgt::Features::SHADER_F16),
410    );
411    caps.set(
412        Caps::SHADER_FLOAT16_IN_FLOAT32,
413        downlevel.contains(wgt::DownlevelFlags::SHADER_F16_IN_F32),
414    );
415    caps.set(
416        Caps::PRIMITIVE_INDEX,
417        features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX),
418    );
419    caps.set(
420        Caps::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
421        features
422            .contains(wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING),
423    );
424    caps.set(
425        Caps::STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING,
426        features.contains(wgt::Features::STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING),
427    );
428    caps.set(
429        Caps::UNIFORM_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
430        features.contains(wgt::Features::UNIFORM_BUFFER_BINDING_ARRAYS),
431    );
432    // TODO: This needs a proper wgpu feature
433    caps.set(
434        Caps::SAMPLER_NON_UNIFORM_INDEXING,
435        features
436            .contains(wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING),
437    );
438    caps.set(
439        Caps::STORAGE_TEXTURE_16BIT_NORM_FORMATS,
440        features.contains(wgt::Features::TEXTURE_FORMAT_16BIT_NORM),
441    );
442    caps.set(Caps::MULTIVIEW, features.contains(wgt::Features::MULTIVIEW));
443    caps.set(
444        Caps::EARLY_DEPTH_TEST,
445        features.contains(wgt::Features::SHADER_EARLY_DEPTH_TEST),
446    );
447    caps.set(
448        Caps::SHADER_INT64,
449        features.contains(wgt::Features::SHADER_INT64),
450    );
451    caps.set(
452        Caps::SHADER_INT64_ATOMIC_MIN_MAX,
453        features.intersects(
454            wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX | wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS,
455        ),
456    );
457    caps.set(
458        Caps::SHADER_INT64_ATOMIC_ALL_OPS,
459        features.contains(wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS),
460    );
461    caps.set(
462        Caps::TEXTURE_ATOMIC,
463        features.contains(wgt::Features::TEXTURE_ATOMIC),
464    );
465    caps.set(
466        Caps::TEXTURE_INT64_ATOMIC,
467        features.contains(wgt::Features::TEXTURE_INT64_ATOMIC),
468    );
469    caps.set(
470        Caps::SHADER_FLOAT32_ATOMIC,
471        features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC),
472    );
473    caps.set(
474        Caps::MULTISAMPLED_SHADING,
475        downlevel.contains(wgt::DownlevelFlags::MULTISAMPLED_SHADING),
476    );
477    caps.set(
478        Caps::DUAL_SOURCE_BLENDING,
479        features.contains(wgt::Features::DUAL_SOURCE_BLENDING),
480    );
481    caps.set(
482        Caps::CLIP_DISTANCE,
483        features.contains(wgt::Features::CLIP_DISTANCES),
484    );
485    caps.set(
486        Caps::CUBE_ARRAY_TEXTURES,
487        downlevel.contains(wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES),
488    );
489    caps.set(
490        Caps::SUBGROUP,
491        features.intersects(wgt::Features::SUBGROUP | wgt::Features::SUBGROUP_VERTEX),
492    );
493    caps.set(
494        Caps::SUBGROUP_BARRIER,
495        features.intersects(wgt::Features::SUBGROUP_BARRIER),
496    );
497    caps.set(
498        Caps::RAY_QUERY,
499        features.intersects(wgt::Features::EXPERIMENTAL_RAY_QUERY),
500    );
501    caps.set(
502        Caps::SUBGROUP_VERTEX_STAGE,
503        features.contains(wgt::Features::SUBGROUP_VERTEX),
504    );
505    caps.set(
506        Caps::RAY_HIT_VERTEX_POSITION,
507        features.intersects(wgt::Features::EXPERIMENTAL_RAY_HIT_VERTEX_RETURN),
508    );
509    caps.set(
510        Caps::TEXTURE_EXTERNAL,
511        features.intersects(wgt::Features::EXTERNAL_TEXTURE),
512    );
513
514    naga::valid::Validator::new(flags, caps)
515}