1pub type OutputBuffers = Vec<Vec<u8>>;
9
10#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
12pub struct OutputSlotStats {
13 pub total_slots: usize,
15 pub reused_slots: usize,
17 pub moved_slots: usize,
19 pub appended_slots: usize,
21}
22
23#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
25pub struct OutputSlotByteStats {
26 pub incoming_bytes: usize,
28 pub copied_bytes: usize,
30 pub moved_bytes: usize,
32 pub appended_bytes: usize,
34 pub retained_capacity_bytes: usize,
36}
37
38#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
40pub struct OutputReplacementStats {
41 pub slots: OutputSlotStats,
43 pub bytes: OutputSlotByteStats,
45}
46
47pub fn replace_output_buffers_preserving_slots(
50 incoming: OutputBuffers,
51 outputs: &mut OutputBuffers,
52) {
53 let _ = replace_output_buffers_preserving_slots_with_stats(incoming, outputs);
54}
55
56pub fn replace_output_buffers_preserving_slots_with_stats(
58 incoming: OutputBuffers,
59 outputs: &mut OutputBuffers,
60) -> OutputSlotStats {
61 replace_output_buffers_preserving_slots_with_memory_stats(incoming, outputs).slots
62}
63
64pub fn replace_output_buffers_preserving_slots_with_memory_stats(
67 incoming: OutputBuffers,
68 outputs: &mut OutputBuffers,
69) -> OutputReplacementStats {
70 let total_slots = incoming.len();
71 let previous_slots = outputs.len();
72 reserve_output_slots_for_replacement(outputs, total_slots);
73 let mut incoming = incoming.into_iter();
74 let mut retained_slots = 0usize;
75 let mut reused_slots = 0usize;
76 let mut moved_slots = 0usize;
77 let mut incoming_bytes = 0usize;
78 let mut copied_bytes = 0usize;
79 let mut moved_bytes = 0usize;
80 let mut appended_bytes = 0usize;
81 for (slot, mut bytes) in outputs.iter_mut().zip(incoming.by_ref()) {
82 incoming_bytes = add_bytes(incoming_bytes, bytes.len(), "incoming output bytes");
83 if bytes.len() <= slot.capacity() {
84 slot.clear();
85 copied_bytes = add_bytes(copied_bytes, bytes.len(), "copied output bytes");
86 slot.extend_from_slice(&bytes);
87 reused_slots += 1;
88 } else {
89 moved_bytes = add_bytes(moved_bytes, bytes.len(), "moved output bytes");
90 std::mem::swap(slot, &mut bytes);
91 moved_slots += 1;
92 }
93 retained_slots += 1;
94 }
95 outputs.truncate(retained_slots);
96 for bytes in incoming {
97 incoming_bytes = add_bytes(incoming_bytes, bytes.len(), "incoming output bytes");
98 appended_bytes = add_bytes(appended_bytes, bytes.len(), "appended output bytes");
99 outputs.push(bytes);
100 }
101 let retained_capacity_bytes = outputs.iter().fold(0usize, |sum, output| {
102 add_bytes(sum, output.capacity(), "retained output capacity bytes")
103 });
104 OutputReplacementStats {
105 slots: OutputSlotStats {
106 total_slots,
107 reused_slots,
108 moved_slots,
109 appended_slots: total_slots.checked_sub(previous_slots).unwrap_or(0),
110 },
111 bytes: OutputSlotByteStats {
112 incoming_bytes,
113 copied_bytes,
114 moved_bytes,
115 appended_bytes,
116 retained_capacity_bytes,
117 },
118 }
119}
120
121fn reserve_output_slots_for_replacement(outputs: &mut OutputBuffers, total_slots: usize) {
122 let _ = crate::allocation::try_reserve_vec_to_capacity(outputs, total_slots);
123}
124
125fn add_bytes(current: usize, incoming: usize, _label: &str) -> usize {
126 current.saturating_add(incoming)
127}
128
129#[derive(Clone, Debug, Eq, PartialEq)]
135pub struct TimedDispatchResult {
136 pub outputs: OutputBuffers,
138 pub wall_ns: u64,
140 pub device_ns: Option<u64>,
142 pub enqueue_ns: Option<u64>,
145 pub wait_ns: Option<u64>,
147}
148
149#[cfg(test)]
150mod tests {
151 use super::*;
152
153 #[test]
154 fn replace_output_buffers_preserves_existing_slots() {
155 let mut outputs = vec![Vec::with_capacity(8), Vec::with_capacity(4)];
156 let outputs_addr = outputs.as_ptr() as usize;
157 let first_slot_addr = outputs[0].as_ptr() as usize;
158 let second_slot_addr = outputs[1].as_ptr() as usize;
159
160 replace_output_buffers_preserving_slots(vec![vec![1, 2], vec![3]], &mut outputs);
161
162 assert_eq!(outputs, vec![vec![1, 2], vec![3]]);
163 assert_eq!(outputs.as_ptr() as usize, outputs_addr);
164 assert_eq!(outputs[0].as_ptr() as usize, first_slot_addr);
165 assert_eq!(outputs[1].as_ptr() as usize, second_slot_addr);
166 }
167
168 #[test]
169 fn replace_output_buffers_truncates_without_dropping_reused_slots() {
170 let mut outputs = vec![Vec::with_capacity(8), Vec::with_capacity(4)];
171 let outputs_addr = outputs.as_ptr() as usize;
172 let first_slot_addr = outputs[0].as_ptr() as usize;
173
174 replace_output_buffers_preserving_slots(vec![vec![9]], &mut outputs);
175
176 assert_eq!(outputs, vec![vec![9]]);
177 assert_eq!(outputs.as_ptr() as usize, outputs_addr);
178 assert_eq!(outputs[0].as_ptr() as usize, first_slot_addr);
179 }
180
181 #[test]
182 fn replace_output_buffers_moves_oversized_incoming_slot_without_copy() {
183 let mut outputs = vec![Vec::with_capacity(1)];
184 let incoming = vec![vec![1, 2, 3, 4]];
185 let incoming_ptr = incoming[0].as_ptr() as usize;
186
187 replace_output_buffers_preserving_slots(incoming, &mut outputs);
188
189 assert_eq!(outputs, vec![vec![1, 2, 3, 4]]);
190 assert_eq!(
191 outputs[0].as_ptr() as usize,
192 incoming_ptr,
193 "oversized incoming output should be moved into place instead of copied through a too-small retained slot"
194 );
195 }
196
197 #[test]
198 fn replace_output_buffers_reports_reuse_move_and_append_stats() {
199 let mut outputs = vec![Vec::with_capacity(8), Vec::with_capacity(1)];
200
201 let stats = replace_output_buffers_preserving_slots_with_stats(
202 vec![vec![1, 2], vec![3, 4], vec![5]],
203 &mut outputs,
204 );
205
206 assert_eq!(outputs, vec![vec![1, 2], vec![3, 4], vec![5]]);
207 assert_eq!(
208 stats,
209 OutputSlotStats {
210 total_slots: 3,
211 reused_slots: 1,
212 moved_slots: 1,
213 appended_slots: 1,
214 }
215 );
216 }
217
218 #[test]
219 fn replace_output_buffers_reserves_outer_slots_before_appending() {
220 let mut outputs: OutputBuffers = Vec::with_capacity(3);
221 outputs.push(Vec::with_capacity(4));
222 outputs[0].extend_from_slice(&[0xaa]);
223 let outer_ptr = outputs.as_ptr() as usize;
224 let first_slot_ptr = outputs[0].as_ptr() as usize;
225
226 let stats = replace_output_buffers_preserving_slots_with_memory_stats(
227 vec![vec![1, 2], vec![3], vec![4, 5, 6]],
228 &mut outputs,
229 );
230
231 assert_eq!(outputs, vec![vec![1, 2], vec![3], vec![4, 5, 6]]);
232 assert_eq!(
233 outputs.as_ptr() as usize,
234 outer_ptr,
235 "outer output vector had enough capacity and must not reallocate while appending new readback slots"
236 );
237 assert_eq!(
238 outputs[0].as_ptr() as usize,
239 first_slot_ptr,
240 "first output slot should be reused because the incoming bytes fit its retained allocation"
241 );
242 assert_eq!(stats.slots.appended_slots, 2);
243 assert_eq!(stats.bytes.appended_bytes, 4);
244 }
245
246 #[test]
247 fn replace_output_buffers_reports_byte_pressure_stats() {
248 let mut outputs = vec![Vec::with_capacity(8), Vec::with_capacity(1)];
249
250 let stats = replace_output_buffers_preserving_slots_with_memory_stats(
251 vec![vec![1, 2], vec![3, 4], vec![5]],
252 &mut outputs,
253 );
254
255 assert_eq!(outputs, vec![vec![1, 2], vec![3, 4], vec![5]]);
256 assert_eq!(
257 stats.bytes,
258 OutputSlotByteStats {
259 incoming_bytes: 5,
260 copied_bytes: 2,
261 moved_bytes: 2,
262 appended_bytes: 1,
263 retained_capacity_bytes: 11,
264 }
265 );
266 }
267}