Skip to main content

vyre_driver/backend/
dispatch_result.rs

1//! Dispatch output payloads shared by every backend.
2
3/// Output of one dispatch: a vector per output buffer slot, each
4/// vector holding the raw bytes read back from the GPU. Consumers
5/// decode the bytes per the Program's output buffer declarations.
6/// The outer vec is indexed in the same order as the Program's
7/// `is_output: true` buffers.
8pub type OutputBuffers = Vec<Vec<u8>>;
9
10/// Slot-reuse accounting from output-buffer replacement.
11#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
12pub struct OutputSlotStats {
13    /// Total output slots written after replacement.
14    pub total_slots: usize,
15    /// Existing output slots whose allocation was reused.
16    pub reused_slots: usize,
17    /// Existing output slots replaced by moving an oversized incoming allocation.
18    pub moved_slots: usize,
19    /// New output slots appended beyond the previous output vector length.
20    pub appended_slots: usize,
21}
22
23/// Byte-pressure accounting from output-buffer replacement.
24#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
25pub struct OutputSlotByteStats {
26    /// Bytes presented by incoming output buffers before replacement.
27    pub incoming_bytes: usize,
28    /// Bytes copied into retained caller-owned slots.
29    pub copied_bytes: usize,
30    /// Bytes moved into place by swapping oversized incoming allocations.
31    pub moved_bytes: usize,
32    /// Bytes appended beyond the previous output vector length.
33    pub appended_bytes: usize,
34    /// Total retained capacity of output slots after replacement.
35    pub retained_capacity_bytes: usize,
36}
37
38/// Full output replacement accounting: slot decisions plus byte pressure.
39#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
40pub struct OutputReplacementStats {
41    /// Slot-level reuse/move/append accounting.
42    pub slots: OutputSlotStats,
43    /// Byte-level copy/move/append/capacity accounting.
44    pub bytes: OutputSlotByteStats,
45}
46
47/// Replace `outputs` with `incoming` while preserving already-allocated output
48/// slots whenever their positions still exist.
49pub fn replace_output_buffers_preserving_slots(
50    incoming: OutputBuffers,
51    outputs: &mut OutputBuffers,
52) {
53    let _ = replace_output_buffers_preserving_slots_with_stats(incoming, outputs);
54}
55
56/// Replace output buffers and return allocation-reuse accounting.
57pub fn replace_output_buffers_preserving_slots_with_stats(
58    incoming: OutputBuffers,
59    outputs: &mut OutputBuffers,
60) -> OutputSlotStats {
61    replace_output_buffers_preserving_slots_with_memory_stats(incoming, outputs).slots
62}
63
64/// Replace output buffers and return allocation-reuse plus byte-pressure
65/// accounting.
66pub fn replace_output_buffers_preserving_slots_with_memory_stats(
67    incoming: OutputBuffers,
68    outputs: &mut OutputBuffers,
69) -> OutputReplacementStats {
70    let total_slots = incoming.len();
71    let previous_slots = outputs.len();
72    reserve_output_slots_for_replacement(outputs, total_slots);
73    let mut incoming = incoming.into_iter();
74    let mut retained_slots = 0usize;
75    let mut reused_slots = 0usize;
76    let mut moved_slots = 0usize;
77    let mut incoming_bytes = 0usize;
78    let mut copied_bytes = 0usize;
79    let mut moved_bytes = 0usize;
80    let mut appended_bytes = 0usize;
81    for (slot, mut bytes) in outputs.iter_mut().zip(incoming.by_ref()) {
82        incoming_bytes = add_bytes(incoming_bytes, bytes.len(), "incoming output bytes");
83        if bytes.len() <= slot.capacity() {
84            slot.clear();
85            copied_bytes = add_bytes(copied_bytes, bytes.len(), "copied output bytes");
86            slot.extend_from_slice(&bytes);
87            reused_slots += 1;
88        } else {
89            moved_bytes = add_bytes(moved_bytes, bytes.len(), "moved output bytes");
90            std::mem::swap(slot, &mut bytes);
91            moved_slots += 1;
92        }
93        retained_slots += 1;
94    }
95    outputs.truncate(retained_slots);
96    for bytes in incoming {
97        incoming_bytes = add_bytes(incoming_bytes, bytes.len(), "incoming output bytes");
98        appended_bytes = add_bytes(appended_bytes, bytes.len(), "appended output bytes");
99        outputs.push(bytes);
100    }
101    let retained_capacity_bytes = outputs.iter().fold(0usize, |sum, output| {
102        add_bytes(sum, output.capacity(), "retained output capacity bytes")
103    });
104    OutputReplacementStats {
105        slots: OutputSlotStats {
106            total_slots,
107            reused_slots,
108            moved_slots,
109            appended_slots: total_slots.checked_sub(previous_slots).unwrap_or(0),
110        },
111        bytes: OutputSlotByteStats {
112            incoming_bytes,
113            copied_bytes,
114            moved_bytes,
115            appended_bytes,
116            retained_capacity_bytes,
117        },
118    }
119}
120
121fn reserve_output_slots_for_replacement(outputs: &mut OutputBuffers, total_slots: usize) {
122    crate::allocation::try_reserve_vec_to_capacity(outputs, total_slots).unwrap_or_else(|error| {
123            panic!(
124                "output replacement could not reserve {total_slots} output slot(s): {error}. Fix: split dispatch outputs before readback replacement."
125            )
126        });
127}
128
129fn add_bytes(current: usize, incoming: usize, label: &str) -> usize {
130    current.checked_add(incoming).unwrap_or_else(|| {
131        panic!(
132            "{label} overflowed usize during output replacement accounting. Fix: split dispatch outputs before accumulating telemetry; silent saturation hides allocation pressure."
133        )
134    })
135}
136
137/// Output plus timing captured by a backend-owned dispatch path.
138///
139/// `wall_ns` is always populated by the shared default implementation.
140/// `device_ns` is populated only when a backend can measure elapsed device
141/// stream time without crossing the driver boundary.
142#[derive(Clone, Debug, Eq, PartialEq)]
143pub struct TimedDispatchResult {
144    /// Output buffers in the same order as [`crate::backend::VyreBackend::dispatch`].
145    pub outputs: OutputBuffers,
146    /// Host-observed dispatch duration.
147    pub wall_ns: u64,
148    /// Device-observed elapsed time when the backend exposes a timer.
149    pub device_ns: Option<u64>,
150    /// Host time spent enqueueing backend work before the caller begins
151    /// waiting for completion.
152    pub enqueue_ns: Option<u64>,
153    /// Host time spent waiting for completion and collecting output buffers.
154    pub wait_ns: Option<u64>,
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    #[test]
162    fn replace_output_buffers_preserves_existing_slots() {
163        let mut outputs = vec![Vec::with_capacity(8), Vec::with_capacity(4)];
164        let outputs_addr = outputs.as_ptr() as usize;
165        let first_slot_addr = outputs[0].as_ptr() as usize;
166        let second_slot_addr = outputs[1].as_ptr() as usize;
167
168        replace_output_buffers_preserving_slots(vec![vec![1, 2], vec![3]], &mut outputs);
169
170        assert_eq!(outputs, vec![vec![1, 2], vec![3]]);
171        assert_eq!(outputs.as_ptr() as usize, outputs_addr);
172        assert_eq!(outputs[0].as_ptr() as usize, first_slot_addr);
173        assert_eq!(outputs[1].as_ptr() as usize, second_slot_addr);
174    }
175
176    #[test]
177    fn replace_output_buffers_truncates_without_dropping_reused_slots() {
178        let mut outputs = vec![Vec::with_capacity(8), Vec::with_capacity(4)];
179        let outputs_addr = outputs.as_ptr() as usize;
180        let first_slot_addr = outputs[0].as_ptr() as usize;
181
182        replace_output_buffers_preserving_slots(vec![vec![9]], &mut outputs);
183
184        assert_eq!(outputs, vec![vec![9]]);
185        assert_eq!(outputs.as_ptr() as usize, outputs_addr);
186        assert_eq!(outputs[0].as_ptr() as usize, first_slot_addr);
187    }
188
189    #[test]
190    fn replace_output_buffers_moves_oversized_incoming_slot_without_copy() {
191        let mut outputs = vec![Vec::with_capacity(1)];
192        let incoming = vec![vec![1, 2, 3, 4]];
193        let incoming_ptr = incoming[0].as_ptr() as usize;
194
195        replace_output_buffers_preserving_slots(incoming, &mut outputs);
196
197        assert_eq!(outputs, vec![vec![1, 2, 3, 4]]);
198        assert_eq!(
199            outputs[0].as_ptr() as usize,
200            incoming_ptr,
201            "oversized incoming output should be moved into place instead of copied through a too-small retained slot"
202        );
203    }
204
205    #[test]
206    fn replace_output_buffers_reports_reuse_move_and_append_stats() {
207        let mut outputs = vec![Vec::with_capacity(8), Vec::with_capacity(1)];
208
209        let stats = replace_output_buffers_preserving_slots_with_stats(
210            vec![vec![1, 2], vec![3, 4], vec![5]],
211            &mut outputs,
212        );
213
214        assert_eq!(outputs, vec![vec![1, 2], vec![3, 4], vec![5]]);
215        assert_eq!(
216            stats,
217            OutputSlotStats {
218                total_slots: 3,
219                reused_slots: 1,
220                moved_slots: 1,
221                appended_slots: 1,
222            }
223        );
224    }
225
226    #[test]
227    fn replace_output_buffers_reserves_outer_slots_before_appending() {
228        let mut outputs: OutputBuffers = Vec::with_capacity(3);
229        outputs.push(Vec::with_capacity(4));
230        outputs[0].extend_from_slice(&[0xaa]);
231        let outer_ptr = outputs.as_ptr() as usize;
232        let first_slot_ptr = outputs[0].as_ptr() as usize;
233
234        let stats = replace_output_buffers_preserving_slots_with_memory_stats(
235            vec![vec![1, 2], vec![3], vec![4, 5, 6]],
236            &mut outputs,
237        );
238
239        assert_eq!(outputs, vec![vec![1, 2], vec![3], vec![4, 5, 6]]);
240        assert_eq!(
241            outputs.as_ptr() as usize,
242            outer_ptr,
243            "outer output vector had enough capacity and must not reallocate while appending new readback slots"
244        );
245        assert_eq!(
246            outputs[0].as_ptr() as usize,
247            first_slot_ptr,
248            "first output slot should be reused because the incoming bytes fit its retained allocation"
249        );
250        assert_eq!(stats.slots.appended_slots, 2);
251        assert_eq!(stats.bytes.appended_bytes, 4);
252    }
253
254    #[test]
255    fn replace_output_buffers_reports_byte_pressure_stats() {
256        let mut outputs = vec![Vec::with_capacity(8), Vec::with_capacity(1)];
257
258        let stats = replace_output_buffers_preserving_slots_with_memory_stats(
259            vec![vec![1, 2], vec![3, 4], vec![5]],
260            &mut outputs,
261        );
262
263        assert_eq!(outputs, vec![vec![1, 2], vec![3, 4], vec![5]]);
264        assert_eq!(
265            stats.bytes,
266            OutputSlotByteStats {
267                incoming_bytes: 5,
268                copied_bytes: 2,
269                moved_bytes: 2,
270                appended_bytes: 1,
271                retained_capacity_bytes: 11,
272            }
273        );
274    }
275}