Skip to main content

vyre_driver/
param_inlining.rs

1//! D7 substrate: push-constant / tiny-param inlining policy.
2//!
3//! When a dispatch's per-launch params buffer is small enough, it can be
4//! inlined into backend launch metadata instead of allocating a uniform
5//! buffer, uploading bytes, binding, and synchronising. Avoiding that
6//! 4-step path costs microseconds per launch on short kernels and is
7//! pure win when the params fit.
8//!
9//! This module owns the *decision*: given a backend's inline budget and
10//! a payload size, should the dispatcher inline? It does **not** own the
11//! per-backend mechanics; those live in the concrete drivers and consume
12//! this policy.
13
14/// Per-backend inline-params policy. Built from live capability probes
15/// so neutral runtime code can pick the inline path without knowing the
16/// concrete backend.
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
18pub struct ParamInliningPolicy {
19    /// Maximum payload bytes the backend can accept inline. Set to `0`
20    /// to disable inlining entirely and force the uniform-buffer path.
21    pub max_inline_bytes: u32,
22    /// Required alignment of the inline payload, in bytes. A payload
23    /// whose size is not a multiple of `align_bytes` cannot be inlined
24    /// unless padding is allowed.
25    pub align_bytes: u32,
26    /// Whether the policy permits the dispatcher to round payload size
27    /// up to the next `align_bytes` multiple before inlining. When
28    /// false, only naturally-aligned payloads inline; oversize after
29    /// padding is still rejected.
30    pub allow_padding_to_align: bool,
31}
32
33impl ParamInliningPolicy {
34    /// Conservative large-inline default: 3 KiB inline budget,
35    /// 4-byte alignment, padding allowed. Concrete drivers with larger
36    /// native launch-metadata budgets should override this from live
37    /// capability probes.
38    #[must_use]
39    pub const fn large_inline_default() -> Self {
40        Self {
41            max_inline_bytes: 3 * 1024,
42            align_bytes: 4,
43            allow_padding_to_align: true,
44        }
45    }
46
47    /// Conservative small-inline default: 128 B inline budget,
48    /// 4-byte alignment, padding allowed. Concrete drivers should
49    /// override this from live capability probes when more inline
50    /// launch metadata is available.
51    #[must_use]
52    pub const fn small_inline_default() -> Self {
53        Self {
54            max_inline_bytes: 128,
55            align_bytes: 4,
56            allow_padding_to_align: true,
57        }
58    }
59
60    /// Construct a policy that disables inlining. Useful for backends
61    /// whose probed limit is zero or for benchmark sweeps that need to
62    /// exclude the inline path.
63    #[must_use]
64    pub const fn disabled() -> Self {
65        Self {
66            max_inline_bytes: 0,
67            align_bytes: 4,
68            allow_padding_to_align: false,
69        }
70    }
71}
72
73/// Decision returned by [`decide_param_inlining`].
74#[derive(Debug, Clone, Copy, PartialEq, Eq)]
75pub enum ParamInliningDecision {
76    /// Payload fits inline; dispatcher should pack it into launch args
77    /// or push constants.
78    Inline {
79        /// Reserved size for the inlined payload (always `>= bytes_len`,
80        /// possibly rounded up to `align_bytes`).
81        padded_bytes: u32,
82    },
83    /// Payload does not fit; dispatcher must allocate a uniform buffer
84    /// and bind it.
85    UniformBuffer,
86}
87
88impl ParamInliningDecision {
89    /// Whether this decision is the inline path (helper for predicates).
90    #[must_use]
91    pub fn is_inline(&self) -> bool {
92        matches!(self, Self::Inline { .. })
93    }
94}
95
96/// Decide how to deliver a `bytes_len`-byte param payload under
97/// `policy`. Returns [`ParamInliningDecision::UniformBuffer`] when the
98/// payload exceeds the inline budget (after optional padding) or when
99/// inlining is disabled.
100#[must_use]
101pub fn decide_param_inlining(bytes_len: u32, policy: ParamInliningPolicy) -> ParamInliningDecision {
102    if policy.max_inline_bytes == 0 {
103        return ParamInliningDecision::UniformBuffer;
104    }
105    if policy.align_bytes == 0 {
106        // Defensive  -  a zero alignment is meaningless; treat as
107        // uniform-buffer-only to avoid undefined-behaviour packing.
108        return ParamInliningDecision::UniformBuffer;
109    }
110
111    let needs_padding = bytes_len % policy.align_bytes != 0;
112    let padded_bytes = if needs_padding {
113        if !policy.allow_padding_to_align {
114            return ParamInliningDecision::UniformBuffer;
115        }
116        // Round up to the next align_bytes multiple exactly. Overflow
117        // cannot inline, because the padded payload is larger than any
118        // representable backend inline budget.
119        let remainder = bytes_len % policy.align_bytes;
120        let padding = policy.align_bytes - remainder;
121        let padded = u64::from(bytes_len) + u64::from(padding);
122        if padded > u64::from(policy.max_inline_bytes) {
123            return ParamInliningDecision::UniformBuffer;
124        }
125        padded as u32
126    } else {
127        bytes_len
128    };
129
130    if padded_bytes <= policy.max_inline_bytes {
131        ParamInliningDecision::Inline { padded_bytes }
132    } else {
133        ParamInliningDecision::UniformBuffer
134    }
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140
141    #[test]
142    fn small_aligned_payload_inlines_under_large_inline_default() {
143        let policy = ParamInliningPolicy::large_inline_default();
144        let decision = decide_param_inlining(64, policy);
145        assert_eq!(decision, ParamInliningDecision::Inline { padded_bytes: 64 });
146        assert!(decision.is_inline());
147    }
148
149    #[test]
150    fn payload_at_inline_ceiling_still_inlines() {
151        let policy = ParamInliningPolicy::large_inline_default();
152        // 3 KiB is exactly the budget; must inline.
153        let decision = decide_param_inlining(3 * 1024, policy);
154        assert_eq!(
155            decision,
156            ParamInliningDecision::Inline {
157                padded_bytes: 3 * 1024
158            }
159        );
160    }
161
162    #[test]
163    fn payload_above_inline_ceiling_falls_back_to_uniform() {
164        let policy = ParamInliningPolicy::large_inline_default();
165        let decision = decide_param_inlining(3 * 1024 + 1, policy);
166        // 3073 -> padded to 3076 -> > 3072 -> UniformBuffer.
167        assert_eq!(decision, ParamInliningDecision::UniformBuffer);
168    }
169
170    #[test]
171    fn unaligned_payload_pads_when_allowed() {
172        let policy = ParamInliningPolicy::large_inline_default();
173        // 17 -> pad to 20 (next multiple of 4).
174        let decision = decide_param_inlining(17, policy);
175        assert_eq!(decision, ParamInliningDecision::Inline { padded_bytes: 20 });
176    }
177
178    #[test]
179    fn unaligned_payload_falls_back_when_padding_disallowed() {
180        let policy = ParamInliningPolicy {
181            max_inline_bytes: 64,
182            align_bytes: 4,
183            allow_padding_to_align: false,
184        };
185        let decision = decide_param_inlining(17, policy);
186        assert_eq!(decision, ParamInliningDecision::UniformBuffer);
187    }
188
189    #[test]
190    fn padded_size_must_also_fit_under_ceiling() {
191        let policy = ParamInliningPolicy {
192            max_inline_bytes: 16,
193            align_bytes: 8,
194            allow_padding_to_align: true,
195        };
196        // 13 -> pad to 16 -> exactly fits.
197        assert_eq!(
198            decide_param_inlining(13, policy),
199            ParamInliningDecision::Inline { padded_bytes: 16 }
200        );
201        // 17 -> pad to 24 -> exceeds 16.
202        assert_eq!(
203            decide_param_inlining(17, policy),
204            ParamInliningDecision::UniformBuffer
205        );
206    }
207
208    #[test]
209    fn disabled_policy_always_uses_uniform() {
210        let policy = ParamInliningPolicy::disabled();
211        assert_eq!(
212            decide_param_inlining(0, policy),
213            ParamInliningDecision::UniformBuffer
214        );
215        assert_eq!(
216            decide_param_inlining(8, policy),
217            ParamInliningDecision::UniformBuffer
218        );
219        assert_eq!(
220            decide_param_inlining(1024, policy),
221            ParamInliningDecision::UniformBuffer
222        );
223    }
224
225    #[test]
226    fn small_inline_default_inlines_tiny_payloads_only() {
227        let policy = ParamInliningPolicy::small_inline_default();
228        // 64-byte payload fits the conservative 128-byte small-inline default.
229        assert_eq!(
230            decide_param_inlining(64, policy),
231            ParamInliningDecision::Inline { padded_bytes: 64 }
232        );
233        // 256 bytes exceeds the conservative small-inline default.
234        assert_eq!(
235            decide_param_inlining(256, policy),
236            ParamInliningDecision::UniformBuffer
237        );
238    }
239
240    #[test]
241    fn zero_byte_payload_inlines_with_zero_padded_bytes() {
242        let policy = ParamInliningPolicy::large_inline_default();
243        // Zero-byte payloads are degenerate but must take the inline path
244        // because there's literally nothing to upload  -  uniform buffer
245        // for zero bytes is wasteful.
246        assert_eq!(
247            decide_param_inlining(0, policy),
248            ParamInliningDecision::Inline { padded_bytes: 0 }
249        );
250    }
251
252    #[test]
253    fn zero_align_policy_falls_back_safely() {
254        // Defensive: a zero alignment policy must not crash; falls back
255        // to uniform buffer instead of attempting unsound packing.
256        let policy = ParamInliningPolicy {
257            max_inline_bytes: 1024,
258            align_bytes: 0,
259            allow_padding_to_align: true,
260        };
261        assert_eq!(
262            decide_param_inlining(64, policy),
263            ParamInliningDecision::UniformBuffer
264        );
265    }
266
267    #[test]
268    fn adversarial_padding_overflow_cannot_inline() {
269        let policy = ParamInliningPolicy {
270            max_inline_bytes: u32::MAX,
271            align_bytes: 256,
272            allow_padding_to_align: true,
273        };
274        assert_eq!(
275            decide_param_inlining(u32::MAX - 1, policy),
276            ParamInliningDecision::UniformBuffer
277        );
278    }
279
280    #[test]
281    fn source_has_no_saturating_padding_math() {
282        let source = include_str!("param_inlining.rs");
283        assert!(
284            !source.contains(concat!(".", "saturating_add")),
285            "param inlining cannot silently clamp launch-param padding"
286        );
287    }
288}