Skip to main content

vyre_driver/backend/
dispatch_result.rs

1//! Dispatch output payloads shared by every backend.
2
3/// Output of one dispatch: a vector per output buffer slot, each
4/// vector holding the raw bytes read back from the GPU. Consumers
5/// decode the bytes per the Program's output buffer declarations.
6/// The outer vec is indexed in the same order as the Program's
7/// `is_output: true` buffers.
8pub type OutputBuffers = Vec<Vec<u8>>;
9
10/// Slot-reuse accounting from output-buffer replacement.
11#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
12pub struct OutputSlotStats {
13    /// Total output slots written after replacement.
14    pub total_slots: usize,
15    /// Existing output slots whose allocation was reused.
16    pub reused_slots: usize,
17    /// Existing output slots replaced by moving an oversized incoming allocation.
18    pub moved_slots: usize,
19    /// New output slots appended beyond the previous output vector length.
20    pub appended_slots: usize,
21}
22
23/// Byte-pressure accounting from output-buffer replacement.
24#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
25pub struct OutputSlotByteStats {
26    /// Bytes presented by incoming output buffers before replacement.
27    pub incoming_bytes: usize,
28    /// Bytes copied into retained caller-owned slots.
29    pub copied_bytes: usize,
30    /// Bytes moved into place by swapping oversized incoming allocations.
31    pub moved_bytes: usize,
32    /// Bytes appended beyond the previous output vector length.
33    pub appended_bytes: usize,
34    /// Total retained capacity of output slots after replacement.
35    pub retained_capacity_bytes: usize,
36}
37
38/// Full output replacement accounting: slot decisions plus byte pressure.
39#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
40pub struct OutputReplacementStats {
41    /// Slot-level reuse/move/append accounting.
42    pub slots: OutputSlotStats,
43    /// Byte-level copy/move/append/capacity accounting.
44    pub bytes: OutputSlotByteStats,
45}
46
47/// Replace `outputs` with `incoming` while preserving already-allocated output
48/// slots whenever their positions still exist.
49pub fn replace_output_buffers_preserving_slots(
50    incoming: OutputBuffers,
51    outputs: &mut OutputBuffers,
52) {
53    let _ = replace_output_buffers_preserving_slots_with_stats(incoming, outputs);
54}
55
56/// Replace output buffers and return allocation-reuse accounting.
57pub fn replace_output_buffers_preserving_slots_with_stats(
58    incoming: OutputBuffers,
59    outputs: &mut OutputBuffers,
60) -> OutputSlotStats {
61    replace_output_buffers_preserving_slots_with_memory_stats(incoming, outputs).slots
62}
63
64/// Replace output buffers and return allocation-reuse plus byte-pressure
65/// accounting.
66pub fn replace_output_buffers_preserving_slots_with_memory_stats(
67    incoming: OutputBuffers,
68    outputs: &mut OutputBuffers,
69) -> OutputReplacementStats {
70    let total_slots = incoming.len();
71    let previous_slots = outputs.len();
72    reserve_output_slots_for_replacement(outputs, total_slots);
73    let mut incoming = incoming.into_iter();
74    let mut retained_slots = 0usize;
75    let mut reused_slots = 0usize;
76    let mut moved_slots = 0usize;
77    let mut incoming_bytes = 0usize;
78    let mut copied_bytes = 0usize;
79    let mut moved_bytes = 0usize;
80    let mut appended_bytes = 0usize;
81    for (slot, mut bytes) in outputs.iter_mut().zip(incoming.by_ref()) {
82        incoming_bytes = add_bytes(incoming_bytes, bytes.len(), "incoming output bytes");
83        if bytes.len() <= slot.capacity() {
84            slot.clear();
85            copied_bytes = add_bytes(copied_bytes, bytes.len(), "copied output bytes");
86            slot.extend_from_slice(&bytes);
87            reused_slots += 1;
88        } else {
89            moved_bytes = add_bytes(moved_bytes, bytes.len(), "moved output bytes");
90            std::mem::swap(slot, &mut bytes);
91            moved_slots += 1;
92        }
93        retained_slots += 1;
94    }
95    outputs.truncate(retained_slots);
96    for bytes in incoming {
97        incoming_bytes = add_bytes(incoming_bytes, bytes.len(), "incoming output bytes");
98        appended_bytes = add_bytes(appended_bytes, bytes.len(), "appended output bytes");
99        outputs.push(bytes);
100    }
101    let retained_capacity_bytes = outputs.iter().fold(0usize, |sum, output| {
102        add_bytes(sum, output.capacity(), "retained output capacity bytes")
103    });
104    OutputReplacementStats {
105        slots: OutputSlotStats {
106            total_slots,
107            reused_slots,
108            moved_slots,
109            appended_slots: total_slots.checked_sub(previous_slots).unwrap_or(0),
110        },
111        bytes: OutputSlotByteStats {
112            incoming_bytes,
113            copied_bytes,
114            moved_bytes,
115            appended_bytes,
116            retained_capacity_bytes,
117        },
118    }
119}
120
121fn reserve_output_slots_for_replacement(outputs: &mut OutputBuffers, total_slots: usize) {
122    let _ = crate::allocation::try_reserve_vec_to_capacity(outputs, total_slots);
123}
124
125fn add_bytes(current: usize, incoming: usize, _label: &str) -> usize {
126    current.saturating_add(incoming)
127}
128
129/// Output plus timing captured by a backend-owned dispatch path.
130///
131/// `wall_ns` is always populated by the shared default implementation.
132/// `device_ns` is populated only when a backend can measure elapsed device
133/// stream time without crossing the driver boundary.
134#[derive(Clone, Debug, Eq, PartialEq)]
135pub struct TimedDispatchResult {
136    /// Output buffers in the same order as [`crate::backend::VyreBackend::dispatch`].
137    pub outputs: OutputBuffers,
138    /// Host-observed dispatch duration.
139    pub wall_ns: u64,
140    /// Device-observed elapsed time when the backend exposes a timer.
141    pub device_ns: Option<u64>,
142    /// Host time spent enqueueing backend work before the caller begins
143    /// waiting for completion.
144    pub enqueue_ns: Option<u64>,
145    /// Host time spent waiting for completion and collecting output buffers.
146    pub wait_ns: Option<u64>,
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152
153    #[test]
154    fn replace_output_buffers_preserves_existing_slots() {
155        let mut outputs = vec![Vec::with_capacity(8), Vec::with_capacity(4)];
156        let outputs_addr = outputs.as_ptr() as usize;
157        let first_slot_addr = outputs[0].as_ptr() as usize;
158        let second_slot_addr = outputs[1].as_ptr() as usize;
159
160        replace_output_buffers_preserving_slots(vec![vec![1, 2], vec![3]], &mut outputs);
161
162        assert_eq!(outputs, vec![vec![1, 2], vec![3]]);
163        assert_eq!(outputs.as_ptr() as usize, outputs_addr);
164        assert_eq!(outputs[0].as_ptr() as usize, first_slot_addr);
165        assert_eq!(outputs[1].as_ptr() as usize, second_slot_addr);
166    }
167
168    #[test]
169    fn replace_output_buffers_truncates_without_dropping_reused_slots() {
170        let mut outputs = vec![Vec::with_capacity(8), Vec::with_capacity(4)];
171        let outputs_addr = outputs.as_ptr() as usize;
172        let first_slot_addr = outputs[0].as_ptr() as usize;
173
174        replace_output_buffers_preserving_slots(vec![vec![9]], &mut outputs);
175
176        assert_eq!(outputs, vec![vec![9]]);
177        assert_eq!(outputs.as_ptr() as usize, outputs_addr);
178        assert_eq!(outputs[0].as_ptr() as usize, first_slot_addr);
179    }
180
181    #[test]
182    fn replace_output_buffers_moves_oversized_incoming_slot_without_copy() {
183        let mut outputs = vec![Vec::with_capacity(1)];
184        let incoming = vec![vec![1, 2, 3, 4]];
185        let incoming_ptr = incoming[0].as_ptr() as usize;
186
187        replace_output_buffers_preserving_slots(incoming, &mut outputs);
188
189        assert_eq!(outputs, vec![vec![1, 2, 3, 4]]);
190        assert_eq!(
191            outputs[0].as_ptr() as usize,
192            incoming_ptr,
193            "oversized incoming output should be moved into place instead of copied through a too-small retained slot"
194        );
195    }
196
197    #[test]
198    fn replace_output_buffers_reports_reuse_move_and_append_stats() {
199        let mut outputs = vec![Vec::with_capacity(8), Vec::with_capacity(1)];
200
201        let stats = replace_output_buffers_preserving_slots_with_stats(
202            vec![vec![1, 2], vec![3, 4], vec![5]],
203            &mut outputs,
204        );
205
206        assert_eq!(outputs, vec![vec![1, 2], vec![3, 4], vec![5]]);
207        assert_eq!(
208            stats,
209            OutputSlotStats {
210                total_slots: 3,
211                reused_slots: 1,
212                moved_slots: 1,
213                appended_slots: 1,
214            }
215        );
216    }
217
218    #[test]
219    fn replace_output_buffers_reserves_outer_slots_before_appending() {
220        let mut outputs: OutputBuffers = Vec::with_capacity(3);
221        outputs.push(Vec::with_capacity(4));
222        outputs[0].extend_from_slice(&[0xaa]);
223        let outer_ptr = outputs.as_ptr() as usize;
224        let first_slot_ptr = outputs[0].as_ptr() as usize;
225
226        let stats = replace_output_buffers_preserving_slots_with_memory_stats(
227            vec![vec![1, 2], vec![3], vec![4, 5, 6]],
228            &mut outputs,
229        );
230
231        assert_eq!(outputs, vec![vec![1, 2], vec![3], vec![4, 5, 6]]);
232        assert_eq!(
233            outputs.as_ptr() as usize,
234            outer_ptr,
235            "outer output vector had enough capacity and must not reallocate while appending new readback slots"
236        );
237        assert_eq!(
238            outputs[0].as_ptr() as usize,
239            first_slot_ptr,
240            "first output slot should be reused because the incoming bytes fit its retained allocation"
241        );
242        assert_eq!(stats.slots.appended_slots, 2);
243        assert_eq!(stats.bytes.appended_bytes, 4);
244    }
245
246    #[test]
247    fn replace_output_buffers_reports_byte_pressure_stats() {
248        let mut outputs = vec![Vec::with_capacity(8), Vec::with_capacity(1)];
249
250        let stats = replace_output_buffers_preserving_slots_with_memory_stats(
251            vec![vec![1, 2], vec![3, 4], vec![5]],
252            &mut outputs,
253        );
254
255        assert_eq!(outputs, vec![vec![1, 2], vec![3, 4], vec![5]]);
256        assert_eq!(
257            stats.bytes,
258            OutputSlotByteStats {
259                incoming_bytes: 5,
260                copied_bytes: 2,
261                moved_bytes: 2,
262                appended_bytes: 1,
263                retained_capacity_bytes: 11,
264            }
265        );
266    }
267}