Skip to main content

vyre_driver/program_walks/
outputs.rs

1//! Output-buffer readback layout and budget checks.
2
3use std::sync::Arc;
4
5use vyre_foundation::ir::{BufferDecl, DataType, Program};
6
7use crate::backend::{BackendError, DispatchConfig};
8
9/// Enforces [`DispatchConfig::max_output_bytes`] against materialized readback buffers.
10///
11/// # Errors
12///
13/// Returns when the summed output length exceeds the configured cap.
14pub fn enforce_actual_output_budget(
15    config: &DispatchConfig,
16    outputs: &[Vec<u8>],
17) -> Result<(), BackendError> {
18    let Some(limit) = config.max_output_bytes else {
19        return Ok(());
20    };
21    let actual = outputs.iter().try_fold(0usize, |sum, output| {
22        sum.checked_add(output.len()).ok_or_else(|| {
23            BackendError::new(
24                "actual readback size overflows usize. Fix: split the Program output before dispatch.",
25            )
26        })
27    })?;
28    if actual > limit {
29        return Err(BackendError::new(format!(
30            "actual readback size {actual} exceeds DispatchConfig.max_output_bytes {limit}. Fix: narrow BufferDecl::output_byte_range or raise max_output_bytes."
31        )));
32    }
33    Ok(())
34}
35
36/// Output readback layout derived from a program's declared output range.
37#[derive(Clone, Copy, Debug, Eq, PartialEq)]
38pub struct OutputLayout {
39    /// Full output buffer byte size allocated on the GPU.
40    pub full_size: usize,
41    /// Consumer-visible byte count returned from dispatch.
42    pub read_size: usize,
43    /// Aligned source offset copied from the GPU output buffer.
44    pub copy_offset: usize,
45    /// Aligned staging-buffer byte size.
46    pub copy_size: usize,
47    /// Offset within the staging buffer where the requested range starts.
48    pub trim_start: usize,
49}
50
51/// Readback and allocation metadata for one writable buffer.
52#[derive(Clone, Debug)]
53pub struct OutputBindingLayout {
54    /// Buffer binding slot.
55    pub binding: u32,
56    /// Buffer name for diagnostics.
57    pub name: Arc<str>,
58    /// Full readback/copy layout for this binding.
59    pub layout: OutputLayout,
60    /// Rounded-up 32-bit word count used for allocation and clears.
61    pub word_count: usize,
62}
63
64/// Derive output readback layout for a program.
65///
66/// # Errors
67///
68/// Returns a backend error when the program has no output buffer or declares
69/// an out-of-bounds output byte range.
70pub fn output_layout_from_program(program: &Program) -> Result<OutputLayout, BackendError> {
71    let Some(&index) = program.output_buffer_indices().first() else {
72        return Err(BackendError::new(
73            "program has no output buffer. Fix: declare exactly one output buffer in the vyre Program.",
74        ));
75    };
76    let output = program.buffers().get(index as usize).ok_or_else(|| {
77        BackendError::new(format!(
78            "output buffer index {index} is out of bounds. Fix: rebuild the Program so writable buffer metadata stays consistent."
79        ))
80    })?;
81    output_binding_layout(output).map(|output| output.layout)
82}
83
84/// All output-buffer binding layouts for `program`, in declaration order.
85///
86/// # Errors
87///
88/// Returns when there is no output buffer, an index is invalid, or layout
89/// math fails.
90pub fn output_binding_layouts(program: &Program) -> Result<Vec<OutputBindingLayout>, BackendError> {
91    let mut outputs = reserved_output_layout_slots(program.output_buffer_indices().len())?;
92    output_binding_layouts_into(program, &mut outputs)?;
93    Ok(outputs)
94}
95
96/// Write output-buffer binding layouts into caller-owned storage.
97///
98/// # Errors
99///
100/// Returns when there is no output buffer, an index is invalid, or layout
101/// math fails.
102pub fn output_binding_layouts_into(
103    program: &Program,
104    outputs: &mut Vec<OutputBindingLayout>,
105) -> Result<(), BackendError> {
106    outputs.clear();
107    reserve_output_layout_slots(outputs, program.output_buffer_indices().len())?;
108    for &index in program.output_buffer_indices() {
109        let output = program.buffers().get(index as usize).ok_or_else(|| {
110            BackendError::new(
111                format!(
112                    "output buffer index {index} is out of bounds. Fix: rebuild the Program so writable buffer metadata stays consistent."
113                ),
114            )
115        })?;
116        outputs.push(output_binding_layout(output)?);
117    }
118    if outputs.is_empty() {
119        return Err(BackendError::new(
120            "program has no output buffer. Fix: declare at least one writable buffer in the vyre Program.",
121        ));
122    }
123    Ok(())
124}
125
126/// Per-output binding layout for a single declared output buffer.
127///
128/// # Errors
129///
130/// Returns when counts, element size, or declared byte range are inconsistent.
131pub fn output_binding_layout(output: &BufferDecl) -> Result<OutputBindingLayout, BackendError> {
132    let count = usize::try_from(output.count()).map_err(|_| {
133        BackendError::new(
134            "program output element count exceeds usize. Fix: split the dispatch into smaller output buffers.",
135        )
136    })?;
137    output.element.validate_layout().map_err(|error| {
138        BackendError::new(format!(
139            "program output `{}` has malformed data-type layout metadata: {error}",
140            output.name()
141        ))
142    })?;
143    let full_size = output.element.packed_size_bytes(count).map_err(|error| {
144        BackendError::new(format!(
145            "program output `{}` byte size could not be computed: {error}",
146            output.name()
147        ))
148    })?.ok_or_else(|| {
149        BackendError::new(
150            "program output element type has no fixed packed byte size. Fix: validate the Program and flatten variable-size outputs before backend pipeline compilation.",
151        )
152    })?;
153    let layout = output_layout(output, full_size)?;
154    let word_count = full_size
155        .checked_add(3)
156        .and_then(|n| n.checked_div(4))
157        .ok_or_else(|| {
158            BackendError::new(
159                "program output word count overflows usize. Fix: split the dispatch into smaller output buffers.",
160            )
161        })?
162        .max(1);
163    Ok(OutputBindingLayout {
164        binding: output.binding(),
165        name: Arc::clone(&output.name),
166        layout,
167        word_count,
168    })
169}
170
171fn output_layout(output: &BufferDecl, full_size: usize) -> Result<OutputLayout, BackendError> {
172    let range = output.output_byte_range().unwrap_or(0..full_size);
173    if range.start > range.end || range.end > full_size {
174        return Err(BackendError::new(format!(
175            "output byte range {:?} is outside output buffer size {full_size}. Fix: declare a range within the output buffer.",
176            range
177        )));
178    }
179    let copy_offset = range.start & !3;
180    let copy_end = align_up_to_u32_word(range.end)?.min(full_size.max(4));
181    let copy_size = copy_end.checked_sub(copy_offset).ok_or_else(|| {
182        BackendError::new(format!(
183            "aligned output copy range underflowed: copy_end={copy_end}, copy_offset={copy_offset}. Fix: declare output_byte_range inside the output buffer."
184        ))
185    })?.max(4);
186    Ok(OutputLayout {
187        full_size,
188        read_size: range.end - range.start,
189        copy_offset,
190        copy_size,
191        trim_start: range.start - copy_offset,
192    })
193}
194
195fn reserve_output_layout_slots(
196    outputs: &mut Vec<OutputBindingLayout>,
197    capacity: usize,
198) -> Result<(), BackendError> {
199    crate::allocation::try_reserve_vec_to_capacity(outputs, capacity).map_err(|error| {
200        BackendError::new(format!(
201            "output binding layout planning could not reserve {capacity} output slot(s): {error}. Fix: split the Program output set or reuse caller-owned output layout scratch."
202        ))
203    })
204}
205
206fn reserved_output_layout_slots(capacity: usize) -> Result<Vec<OutputBindingLayout>, BackendError> {
207    let mut outputs = Vec::new();
208    reserve_output_layout_slots(&mut outputs, capacity)?;
209    Ok(outputs)
210}
211
212fn align_up_to_u32_word(value: usize) -> Result<usize, BackendError> {
213    value.checked_add(3).map(|end| end & !3).ok_or_else(|| {
214        BackendError::new(format!(
215            "aligned output copy end overflows usize for byte offset {value}. Fix: declare a smaller output_byte_range before backend readback planning."
216        ))
217    })
218}
219
220#[cfg(test)]
221mod tests {
222    use super::*;
223    use vyre_foundation::ir::{BufferDecl, DataType};
224
225    #[test]
226    fn output_layout_planning_uses_fallible_modular_reservation_and_alignment() {
227        let source = include_str!("outputs.rs");
228        let production = source
229            .split("#[cfg(test)]")
230            .next()
231            .expect("Fix: output-layout source must contain production section before tests");
232
233        assert!(
234            production.contains("fn reserve_output_layout_slots")
235                && production.contains("fn align_up_to_u32_word")
236                && production.contains("try_reserve_vec_to_capacity"),
237            "Fix: output layout planning must keep reservation and alignment as modular fallible helpers."
238        );
239        assert!(
240            !production.contains("Vec::with_capacity")
241                && !production.contains(".reserve(program.output_buffer_indices().len())")
242                && !production.contains(".next_multiple_of(4)")
243                && !production.contains(".unwrap_or(full_size)"),
244            "Fix: output layout planning must not allocate infallibly or hide overflow in release paths."
245        );
246    }
247
248    #[test]
249    fn output_layout_alignment_rejects_usize_overflow() {
250        let error =
251            align_up_to_u32_word(usize::MAX).expect_err("max byte offset cannot align upward");
252        assert!(
253            error.to_string().contains("Fix:"),
254            "alignment overflow must be actionable: {error}"
255        );
256    }
257
258    #[test]
259    fn output_layout_uses_packed_size_for_subbyte_elements() {
260        let output = BufferDecl::output("packed_i4", 0, DataType::I4).with_count(3);
261        let layout = output_binding_layout(&output)
262            .expect("Fix: packed I4 output layout should use packed byte sizing");
263
264        assert_eq!(layout.layout.full_size, 2);
265        assert_eq!(layout.layout.read_size, 2);
266        assert_eq!(layout.word_count, 1);
267    }
268
269    #[test]
270    fn output_layout_rejects_malformed_data_type_layouts() {
271        let output = BufferDecl::output(
272            "bad_bsr",
273            0,
274            DataType::SparseBsr {
275                element: Box::new(DataType::F32),
276                block_rows: 0,
277                block_cols: 4,
278            },
279        )
280        .with_count(1);
281
282        let error = output_binding_layout(&output)
283            .expect_err("zero-height BSR blocks must not enter output planning");
284        assert!(
285            error
286                .to_string()
287                .contains("SparseBsr block_rows must be > 0"),
288            "Fix: malformed output data-type layout diagnostics must remain actionable: {error}"
289        );
290    }
291}
292
293/// Fixed scalar element size in bytes for [`DataType`].
294///
295/// # Errors
296///
297/// Returns when the type has no fixed size (e.g. unsized or dynamic).
298pub fn element_size_bytes(data_type: &DataType) -> Result<usize, BackendError> {
299    data_type.size_bytes().ok_or_else(|| {
300        BackendError::new(
301            "output buffer element type has no fixed scalar element size. Fix: validate the Program and flatten variable-size outputs before backend pipeline compilation.",
302        )
303    })
304}