Skip to main content

vyre_wgpu/engine/decompress/
mod.rs

1//! Bulk GPU decompression pipeline using the vyre runtime.
2//!
3//! NOTE: This is a host-side workflow dispatcher, not an IR op domain. It
4//! provides format-specific GPU decompression entry points that parse
5//! compressed frames, dispatch the appropriate kernel, and return the
6//! decompressed payload.
7
8pub(crate) mod dispatch_kernel;
9/// The `formats` module.
10pub mod formats;
11pub(crate) mod uniforms;
12
13use vyre::error::{Error, Result};
14
15pub use self::formats::lz4::{dispatch_lz4, Lz4DispatchArgs};
16pub use self::formats::zstd::{dispatch_zstd, ZstdDispatchArgs};
17pub use self::uniforms::{Lz4Uniforms, ZstdUniforms};
18
19/// Maximum decompressed output bytes accepted by one GPU decompression call.
20pub const MAX_DECOMPRESS_OUTPUT_BYTES: usize = 256 * 1024 * 1024;
21
22/// GPU decompression limits.
23#[derive(Clone, Copy, Debug, Eq, PartialEq)]
24#[non_exhaustive]
25pub struct DecompressLimits {
26    /// Maximum allowed compressed input size in bytes.
27    pub max_input_size: usize,
28    /// Maximum allowed decompressed output size in bytes.
29    pub max_output_size: usize,
30}
31
32impl Default for DecompressLimits {
33    fn default() -> Self {
34        Self {
35            max_input_size: 64 * 1024 * 1024,
36            max_output_size: MAX_DECOMPRESS_OUTPUT_BYTES,
37        }
38    }
39}
40
41/// Validate declared decompression expansion before GPU dispatch.
42///
43/// # Errors
44///
45/// Returns `Error::Decompress` when the declared decompressed byte count exceeds
46/// `max_output_ratio * compressed_len`.
47pub fn validate_output_ratio(
48    format: &str,
49    compressed_len: usize,
50    declared_output_len: usize,
51    max_output_ratio: usize,
52) -> Result<()> {
53    if declared_output_len == 0 {
54        return Ok(());
55    }
56    if compressed_len == 0 {
57        return Err(Error::Decompress {
58            message: format!(
59                "{format} declared {declared_output_len} output bytes from an empty compressed input. Fix: reject empty-input decompression bombs before GPU dispatch."
60            ),
61        });
62    }
63    let max_output = compressed_len
64        .checked_mul(max_output_ratio)
65        .ok_or_else(|| Error::Decompress {
66            message: format!(
67                "{format} output-ratio limit overflowed for compressed length {compressed_len} and ratio {max_output_ratio}. Fix: split the compressed input before GPU dispatch."
68            ),
69        })?;
70    if declared_output_len > max_output {
71        return Err(Error::Decompress {
72            message: format!(
73                "{format} declared {declared_output_len} output bytes for {compressed_len} compressed bytes, exceeding max_output_ratio {max_output_ratio}. Fix: reject the decompression bomb or lower the declared output size."
74            ),
75        });
76    }
77    Ok(())
78}
79
80pub(crate) fn validate_backend_capacity(
81    device: &wgpu::Device,
82    compressed_len: usize,
83    descriptor_words: usize,
84    output_words: usize,
85    status_words: usize,
86    uniform_words: usize,
87) -> Result<()> {
88    let limits = device.limits();
89    let storage_binding_limit = u64::from(limits.max_storage_buffer_binding_size);
90    for (name, bytes) in [
91        ("compressed input", align_to_copy(u64::try_from(compressed_len).map_err(|source| Error::Gpu {
92            message: format!("compressed input length cannot fit u64: {source}. Fix: split the compressed input before GPU dispatch."),
93        })?)),
94        ("descriptor buffer", bytes_for_u32s(descriptor_words)?),
95        ("output buffer", bytes_for_u32s(output_words)?),
96        ("status buffer", bytes_for_u32s(status_words)?),
97        ("uniform buffer", bytes_for_u32s(uniform_words)?),
98    ] {
99        if bytes > limits.max_buffer_size || bytes > storage_binding_limit {
100            return Err(Error::Gpu {
101                message: format!(
102                    "{name} requires {bytes} bytes, exceeding the adapter storage-buffer limit. Fix: split the input or use a GPU with larger storage buffers."
103                ),
104            });
105        }
106    }
107    Ok(())
108}
109
110pub(crate) fn unpack_words_to_bytes(words: &[u32], byte_len: usize) -> Result<Vec<u8>> {
111    if byte_len > MAX_DECOMPRESS_OUTPUT_BYTES {
112        return Err(Error::Decompress {
113            message: format!(
114                "decompressed output is {byte_len} bytes, exceeding {MAX_DECOMPRESS_OUTPUT_BYTES}. Fix: split the payload or lower declared output size."
115            ),
116        });
117    }
118    let available = words
119        .len()
120        .checked_mul(4)
121        .ok_or_else(|| Error::Decompress {
122            message: "readback capacity overflowed usize. Fix: split the decompression workload."
123                .to_string(),
124        })?;
125    if byte_len > available {
126        return Err(Error::Decompress {
127            message: format!(
128                "declared decompressed output {byte_len} bytes exceeds readback capacity {available}. Fix: reject this malformed decompression descriptor."
129            ),
130        });
131    }
132    let mut bytes = vec![0_u8; byte_len];
133    for (index, byte) in bytes.iter_mut().enumerate() {
134        *byte = ((words[index / 4] >> ((index % 4) * 8)) & 0xff) as u8;
135    }
136    Ok(bytes)
137}
138
139pub(crate) fn decode_u32s(bytes: &[u8]) -> Result<Vec<u32>> {
140    if bytes.len() % 4 != 0 {
141        return Err(Error::Gpu {
142            message: format!(
143                "GPU readback length {} is not divisible by 4. Fix: check decompression buffer sizing.",
144                bytes.len()
145            ),
146        });
147    }
148    Ok(bytes
149        .chunks_exact(4)
150        .map(|chunk| u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
151        .collect())
152}
153
154pub(crate) fn bytes_for_u32s(words: usize) -> Result<u64> {
155    let bytes = words.checked_mul(4).ok_or_else(|| Error::Gpu {
156        message: "buffer size overflow. Fix: split the decompression workload before GPU dispatch."
157            .to_string(),
158    })?;
159    u64::try_from(bytes.max(4)).map_err(|source| Error::Gpu {
160        message: format!("buffer byte length cannot fit u64: {source}. Fix: split the workload."),
161    })
162}
163
164pub(crate) fn words_for_output_bytes(format: &str, bytes: usize) -> Result<usize> {
165    if bytes > MAX_DECOMPRESS_OUTPUT_BYTES {
166        return Err(Error::Decompress {
167            message: format!(
168                "{format} decompressed output is {bytes} bytes, exceeding {MAX_DECOMPRESS_OUTPUT_BYTES}. Fix: split the compressed stream before GPU dispatch."
169            ),
170        });
171    }
172    Ok(bytes.div_ceil(4).max(1))
173}
174
175pub(crate) fn u32_output_len(format: &str, bytes: usize) -> Result<u32> {
176    u32::try_from(bytes).map_err(|source| Error::Decompress {
177        message: format!(
178            "{format} decompressed output length {bytes} cannot fit u32 shader uniforms: {source}. Fix: split the compressed stream before GPU dispatch."
179        ),
180    })
181}
182
183pub(crate) fn align_to_copy(size: u64) -> u64 {
184    let alignment = wgpu::COPY_BUFFER_ALIGNMENT;
185    size.div_ceil(alignment).max(1) * alignment
186}
187
188pub(crate) fn binding(binding: u32, buffer: &wgpu::Buffer) -> wgpu::BindGroupEntry<'_> {
189    wgpu::BindGroupEntry {
190        binding,
191        resource: buffer.as_entire_binding(),
192    }
193}