Skip to main content

vyre_driver/
numeric.rs

1//! Backend-neutral numeric boundary conversions.
2//!
3//! Concrete GPU backends cross the same host/API boundaries: host sizes become
4//! API `u64`s, high-resolution timers become telemetry `u64`s, and device
5//! timestamp deltas arrive as rounded floating-point nanoseconds. This module is
6//! the single policy for those lossy or fallible conversions; backend crates add
7//! only the backend label that makes the diagnostic actionable.
8
9use std::time::Instant;
10
11use crate::BackendError;
12
13/// Integer basis-point denominator: 10_000 bps = 100%.
14pub const BASIS_POINTS_DENOMINATOR: u32 = 10_000;
15
16/// Backend-bound numeric conversion policy.
17///
18/// Backends should keep their label in one constant of this type instead of
19/// cloning one local wrapper per numeric helper. The free functions below remain
20/// available for backend-neutral callers and tests.
21#[derive(Clone, Copy, Debug, Eq, PartialEq)]
22pub struct BackendNumericPolicy {
23    backend: &'static str,
24}
25
26impl BackendNumericPolicy {
27    /// Create a numeric policy that annotates diagnostics with `backend`.
28    #[must_use]
29    pub const fn new(backend: &'static str) -> Self {
30        Self { backend }
31    }
32
33    /// Return the backend label used in numeric diagnostics.
34    #[must_use]
35    pub const fn backend(self) -> &'static str {
36        self.backend
37    }
38
39    /// Convert a host `usize` to a backend/API `u64`.
40    ///
41    /// # Errors
42    /// Returns [`BackendError::InvalidProgram`] when the value cannot fit in
43    /// the backend/API boundary type.
44    pub fn usize_to_u64(self, value: usize, label: &str) -> Result<u64, BackendError> {
45        usize_to_u64(value, label, self.backend)
46    }
47
48    /// Convert a wide counter to telemetry `u64`.
49    ///
50    /// # Errors
51    /// Returns [`BackendError::InvalidProgram`] when the counter does not fit in
52    /// telemetry storage.
53    pub fn u128_to_u64(self, value: u128, label: &str) -> Result<u64, BackendError> {
54        u128_to_u64(value, label, self.backend)
55    }
56
57    /// Convert elapsed wall-clock time to telemetry nanoseconds.
58    ///
59    /// # Errors
60    /// Returns [`BackendError::InvalidProgram`] when the elapsed nanoseconds
61    /// cannot fit in telemetry storage.
62    pub fn elapsed_nanos_u64(self, started: Instant, label: &str) -> Result<u64, BackendError> {
63        elapsed_nanos_u64(started, label, self.backend)
64    }
65
66    /// Round a finite floating-point nanosecond value into telemetry storage.
67    ///
68    /// # Errors
69    /// Returns [`BackendError::InvalidProgram`] when the rounded value is
70    /// negative, non-finite, or too large for telemetry storage.
71    pub fn rounded_f64_to_u64(self, value: f64, label: &str) -> Result<u64, BackendError> {
72        rounded_f64_to_u64(value, label, self.backend)
73    }
74
75    /// Compute `part / whole` as floor basis points in a `u32` telemetry domain.
76    #[must_use]
77    pub fn ratio_basis_points_u64(
78        self,
79        part: u64,
80        whole: u64,
81        denominator_zero_value: u32,
82        label: &str,
83    ) -> u32 {
84        ratio_basis_points_u64(part, whole, denominator_zero_value, label, self.backend)
85    }
86
87    /// Compute `part / whole` as floor basis points in a `u64` telemetry domain.
88    #[must_use]
89    pub fn ratio_basis_points_u64_wide(
90        self,
91        part: u64,
92        whole: u64,
93        denominator_zero_value: u64,
94        label: &str,
95    ) -> u64 {
96        ratio_basis_points_u64_wide(part, whole, denominator_zero_value, label, self.backend)
97    }
98
99    /// Compute `part / whole` as floor parts-per-million.
100    #[must_use]
101    pub fn ratio_parts_per_million_u64(
102        self,
103        part: u64,
104        whole: u64,
105        denominator_zero_value: u32,
106        label: &str,
107    ) -> u32 {
108        ratio_parts_per_million_u64(part, whole, denominator_zero_value, label, self.backend)
109    }
110
111    /// Compose two basis-point multipliers into a `u32` result.
112    #[must_use]
113    pub fn compose_basis_points_u32(self, left: u32, right: u32, label: &str) -> u32 {
114        compose_basis_points_u32(left, right, label, self.backend)
115    }
116
117    /// Apply rounded basis-point scaling with optional high clamp.
118    #[must_use]
119    pub fn scale_u64_by_basis_points_round_clamped(
120        self,
121        base: u64,
122        scale_bps: u32,
123        zero_scale_value: u64,
124        max_scale_bps: u32,
125        label: &str,
126    ) -> u64 {
127        scale_u64_by_basis_points_round_clamped(
128            base,
129            scale_bps,
130            zero_scale_value,
131            max_scale_bps,
132            label,
133            self.backend,
134        )
135    }
136
137    /// Apply floor basis-point scaling with a lower bound.
138    #[must_use]
139    pub fn scale_u64_by_basis_points_floor_min(
140        self,
141        base: u64,
142        scale_bps: u32,
143        min_value: u64,
144        label: &str,
145    ) -> u64 {
146        scale_u64_by_basis_points_floor_min(base, scale_bps, min_value, label, self.backend)
147    }
148
149    /// Convert finite non-negative floating-point telemetry to `u32` by truncation.
150    #[must_use]
151    pub fn finite_f64_to_u32_trunc(self, value: f64, label: &str) -> u32 {
152        finite_f64_to_u32_trunc(value, label, self.backend)
153    }
154
155    /// Convert finite non-negative floating-point telemetry to rounded `u32`.
156    #[must_use]
157    pub fn finite_f64_to_u32_round(self, value: f64, label: &str) -> u32 {
158        finite_f64_to_u32_round(value, label, self.backend)
159    }
160
161    /// Convert a finite floating-point ratio into floor basis points.
162    #[must_use]
163    pub fn finite_f64_ratio_basis_points_trunc(
164        self,
165        numerator: f64,
166        denominator: f64,
167        invalid_numerator_value: u32,
168        invalid_denominator_value: u32,
169        label: &str,
170    ) -> u32 {
171        finite_f64_ratio_basis_points_trunc(
172            numerator,
173            denominator,
174            invalid_numerator_value,
175            invalid_denominator_value,
176            label,
177            self.backend,
178        )
179    }
180
181    /// Convert a finite floating-point ratio into rounded basis points.
182    #[must_use]
183    pub fn finite_f64_ratio_basis_points_round(
184        self,
185        numerator: f64,
186        denominator: f64,
187        invalid_numerator_value: u32,
188        invalid_denominator_value: u32,
189        label: &str,
190    ) -> u32 {
191        finite_f64_ratio_basis_points_round(
192            numerator,
193            denominator,
194            invalid_numerator_value,
195            invalid_denominator_value,
196            label,
197            self.backend,
198        )
199    }
200
201    /// Convert a finite scalar where `1.0 == 10_000 bps` into floor basis points.
202    #[must_use]
203    pub fn finite_f64_unit_basis_points_trunc(
204        self,
205        value: f64,
206        invalid_value: u32,
207        label: &str,
208    ) -> u32 {
209        finite_f64_unit_basis_points_trunc(value, invalid_value, label, self.backend)
210    }
211
212    /// Compute `ceil(value / divisor)` in `u64`, returning `None` for zero
213    /// divisors or arithmetic overflow.
214    #[must_use]
215    pub fn checked_ceil_div_u64(self, value: u64, divisor: u64) -> Option<u64> {
216        checked_ceil_div_u64(value, divisor)
217    }
218
219    /// Multiply three `u32` launch dimensions into a `u64` without wraparound.
220    #[must_use]
221    pub fn checked_dim_product_u64(self, dims: [u32; 3]) -> Option<u64> {
222        checked_dim_product_u64(dims)
223    }
224
225    /// Multiply three `u32` launch dimensions into a `u32` without wraparound.
226    #[must_use]
227    pub fn checked_dim_product_u32(self, dims: [u32; 3]) -> Option<u32> {
228        checked_dim_product_u32(dims)
229    }
230
231    /// Align `value` upward to `alignment`, after applying `min_value`.
232    ///
233    /// # Errors
234    /// Returns [`BackendError::InvalidProgram`] when `alignment` is zero or the
235    /// padded value would overflow `u64`.
236    pub fn align_up_u64(
237        self,
238        value: u64,
239        alignment: u64,
240        min_value: u64,
241        label: &str,
242    ) -> Result<u64, BackendError> {
243        align_up_u64(value, alignment, min_value, label, self.backend)
244    }
245
246    /// Align `value` upward to `alignment`, after applying `min_value`.
247    ///
248    /// # Errors
249    /// Returns [`BackendError::InvalidProgram`] when `alignment` is zero or the
250    /// padded value would overflow `usize`.
251    pub fn align_up_usize(
252        self,
253        value: usize,
254        alignment: usize,
255        min_value: usize,
256        label: &str,
257    ) -> Result<usize, BackendError> {
258        align_up_usize(value, alignment, min_value, label, self.backend)
259    }
260}
261
262/// Convert a host `usize` to a backend/API `u64`.
263///
264/// # Errors
265/// Returns [`BackendError::InvalidProgram`] when the value cannot fit in the
266/// backend/API boundary type.
267pub fn usize_to_u64(value: usize, label: &str, backend: &str) -> Result<u64, BackendError> {
268    u64::try_from(value).map_err(|source| BackendError::InvalidProgram {
269        fix: format!(
270            "Fix: {backend} {label} cannot fit u64: {source}; split the workload before crossing the host/device boundary."
271        ),
272    })
273}
274
275/// Convert a wide counter to telemetry `u64`.
276///
277/// # Errors
278/// Returns [`BackendError::InvalidProgram`] when the counter does not fit in
279/// telemetry storage.
280pub fn u128_to_u64(value: u128, label: &str, backend: &str) -> Result<u64, BackendError> {
281    u64::try_from(value).map_err(|source| BackendError::InvalidProgram {
282        fix: format!(
283            "Fix: {backend} {label} cannot fit u64: {source}; split the dispatch before telemetry overflows."
284        ),
285    })
286}
287
288/// Convert elapsed wall-clock time to telemetry nanoseconds.
289///
290/// # Errors
291/// Returns [`BackendError::InvalidProgram`] when the elapsed nanoseconds cannot
292/// fit in telemetry storage.
293pub fn elapsed_nanos_u64(
294    started: Instant,
295    label: &str,
296    backend: &str,
297) -> Result<u64, BackendError> {
298    u128_to_u64(started.elapsed().as_nanos(), label, backend)
299}
300
301/// Round a finite floating-point nanosecond value into telemetry storage.
302///
303/// # Errors
304/// Returns [`BackendError::InvalidProgram`] when the rounded value is negative,
305/// non-finite, or too large for telemetry storage.
306pub fn rounded_f64_to_u64(value: f64, label: &str, backend: &str) -> Result<u64, BackendError> {
307    let rounded = value.round();
308    if !rounded.is_finite() || rounded < 0.0 || rounded > u64::MAX as f64 {
309        return Err(BackendError::InvalidProgram {
310            fix: format!(
311                "Fix: {backend} {label} value {value} cannot fit u64 after rounding; inspect device timing and split the dispatch before telemetry overflows."
312            ),
313        });
314    }
315    u64::try_from(rounded as u128).map_err(|source| BackendError::InvalidProgram {
316        fix: format!(
317            "Fix: {backend} {label} rounded value cannot fit u64: {source}; inspect device timing and split the dispatch before telemetry overflows."
318        ),
319    })
320}
321
322/// Compute `part / whole` as floor basis points with explicit zero-denominator
323/// policy and saturating telemetry overflow.
324///
325/// CUDA release-path planners use the same ratio encoding for memory pressure,
326/// readback savings, and device-side compaction. Keeping the arithmetic here
327/// prevents each backend module from carrying its own unchecked `as u32` cast.
328#[must_use]
329pub fn ratio_basis_points_u64(
330    part: u64,
331    whole: u64,
332    denominator_zero_value: u32,
333    label: &str,
334    backend: &str,
335) -> u32 {
336    let value = ratio_basis_points_u64_wide(
337        part,
338        whole,
339        u64::from(denominator_zero_value),
340        label,
341        backend,
342    );
343    if value > u64::from(u32::MAX) {
344        tracing::error!(
345            "{backend} {label} basis-points value exceeded u32. Fix: shard or normalize the telemetry domain before release-path planning."
346        );
347        return u32::MAX;
348    }
349    value as u32
350}
351
352/// Compute `part / whole` as floor basis points in a `u64` telemetry domain
353/// with explicit zero-denominator policy and loud overflow pinning.
354#[must_use]
355pub fn ratio_basis_points_u64_wide(
356    part: u64,
357    whole: u64,
358    denominator_zero_value: u64,
359    label: &str,
360    backend: &str,
361) -> u64 {
362    if whole == 0 {
363        return denominator_zero_value;
364    }
365    let value = (u128::from(part) * u128::from(BASIS_POINTS_DENOMINATOR)) / u128::from(whole);
366    if value > u128::from(u64::MAX) {
367        tracing::error!(
368            "{backend} {label} basis-points value exceeded u64. Fix: shard or normalize the telemetry domain before release-path planning."
369        );
370        return u64::MAX;
371    }
372    value as u64
373}
374
375/// Compute `part / whole` as floor parts-per-million with explicit
376/// zero-denominator policy and loud `u32` overflow pinning.
377#[must_use]
378pub fn ratio_parts_per_million_u64(
379    part: u64,
380    whole: u64,
381    denominator_zero_value: u32,
382    label: &str,
383    backend: &str,
384) -> u32 {
385    if whole == 0 {
386        return denominator_zero_value;
387    }
388    let value = (u128::from(part) * 1_000_000) / u128::from(whole);
389    if value > u128::from(u32::MAX) {
390        tracing::error!(
391            "{backend} {label} parts-per-million value exceeded u32. Fix: shard or normalize telemetry before release-path planning."
392        );
393        return u32::MAX;
394    }
395    value as u32
396}
397
398/// Compose two basis-point multipliers as `(left * right) / 10_000`, with
399/// widened arithmetic and loud `u32` overflow pinning.
400#[must_use]
401pub fn compose_basis_points_u32(left: u32, right: u32, label: &str, backend: &str) -> u32 {
402    let value = (u128::from(left) * u128::from(right)) / u128::from(BASIS_POINTS_DENOMINATOR);
403    if value > u128::from(u32::MAX) {
404        tracing::error!(
405            "{backend} {label} composed basis-points value exceeded u32. Fix: normalize chained multipliers before release-path planning."
406        );
407        return u32::MAX;
408    }
409    value as u32
410}
411
412/// Compose two basis-point multipliers as `(left * right) / 10_000`, returning
413/// `None` rather than saturating when the composed value cannot fit `u64`.
414#[must_use]
415pub fn checked_compose_basis_points_u64(left: u64, right: u64) -> Option<u64> {
416    let value = (u128::from(left) * u128::from(right)) / u128::from(BASIS_POINTS_DENOMINATOR);
417    u64::try_from(value).ok()
418}
419
420/// Apply a basis-point multiplier to a `u64` with nearest-integer rounding,
421/// optional high clamp, and explicit zero-scale policy.
422#[must_use]
423pub fn scale_u64_by_basis_points_round_clamped(
424    base: u64,
425    scale_bps: u32,
426    zero_scale_value: u64,
427    max_scale_bps: u32,
428    label: &str,
429    backend: &str,
430) -> u64 {
431    if scale_bps == 0 {
432        return zero_scale_value;
433    }
434    let clamped = if max_scale_bps == 0 {
435        scale_bps
436    } else {
437        scale_bps.min(max_scale_bps)
438    };
439    let value = (u128::from(base) * u128::from(clamped) + u128::from(BASIS_POINTS_DENOMINATOR / 2))
440        / u128::from(BASIS_POINTS_DENOMINATOR);
441    if value > u128::from(u64::MAX) {
442        tracing::error!(
443            "{backend} {label} rounded basis-point scaling exceeded u64. Fix: shard or normalize the cost domain before extraction."
444        );
445        return u64::MAX;
446    }
447    value as u64
448}
449
450/// Apply a basis-point multiplier to a `u64` with floor rounding and an output
451/// lower bound.
452#[must_use]
453
454pub fn scale_u64_by_basis_points_floor_min(
455    base: u64,
456    scale_bps: u32,
457    min_value: u64,
458    label: &str,
459    backend: &str,
460) -> u64 {
461    let value = (u128::from(base) * u128::from(scale_bps)) / u128::from(BASIS_POINTS_DENOMINATOR);
462    if value > u128::from(u64::MAX) {
463        tracing::error!(
464            "{backend} {label} floor basis-point scaling exceeded u64. Fix: shard or normalize the cost domain before extraction."
465        );
466        return u64::MAX;
467    }
468    (value as u64).max(min_value)
469}
470
471/// Weight a `u64` cost by basis points into a widened exact `u128` domain.
472#[must_use]
473pub fn weighted_u64_by_basis_points_u128(value: u64, basis_points: u32) -> u128 {
474    (u128::from(value) * u128::from(basis_points)) / u128::from(BASIS_POINTS_DENOMINATOR)
475}
476
477/// Convert a finite non-negative floating-point telemetry value to `u32` by
478/// truncating toward zero, with loud saturation on invalid or oversized input.
479#[must_use]
480pub fn finite_f64_to_u32_trunc(value: f64, label: &str, backend: &str) -> u32 {
481    if !value.is_finite() {
482        tracing::error!(
483            "{backend} {label} value {value} is not finite. Fix: normalize telemetry before release-path planning."
484        );
485        return u32::MAX;
486    }
487    if value <= 0.0 {
488        return 0;
489    }
490    if value > f64::from(u32::MAX) {
491        tracing::error!(
492            "{backend} {label} value {value} cannot fit u32. Fix: shard or normalize telemetry before release-path planning."
493        );
494        return u32::MAX;
495    }
496    value as u32
497}
498
499/// Convert a finite non-negative floating-point telemetry value to `u32` after
500/// rounding to the nearest integer, with loud saturation on invalid input.
501#[must_use]
502pub fn finite_f64_to_u32_round(value: f64, label: &str, backend: &str) -> u32 {
503    let rounded = value.round();
504    if !rounded.is_finite() {
505        tracing::error!(
506            "{backend} {label} rounded value {rounded} is not finite. Fix: normalize telemetry before release-path planning."
507        );
508        return u32::MAX;
509    }
510    if rounded <= 0.0 {
511        return 0;
512    }
513    if rounded > f64::from(u32::MAX) {
514        tracing::error!(
515            "{backend} {label} rounded value {rounded} cannot fit u32. Fix: shard or normalize telemetry before release-path planning."
516        );
517        return u32::MAX;
518    }
519    rounded as u32
520}
521
522/// Convert a finite floating-point ratio into floor basis points, with separate
523/// policies for invalid numerators and denominators.
524#[must_use]
525pub fn finite_f64_ratio_basis_points_trunc(
526    numerator: f64,
527    denominator: f64,
528    invalid_numerator_value: u32,
529    invalid_denominator_value: u32,
530    label: &str,
531    backend: &str,
532) -> u32 {
533    finite_f64_ratio_basis_points(
534        numerator,
535        denominator,
536        invalid_numerator_value,
537        invalid_denominator_value,
538        label,
539        backend,
540        finite_f64_to_u32_trunc,
541    )
542}
543
544/// Convert a finite floating-point ratio into rounded basis points, with
545/// separate policies for invalid numerators and denominators.
546#[must_use]
547pub fn finite_f64_ratio_basis_points_round(
548    numerator: f64,
549    denominator: f64,
550    invalid_numerator_value: u32,
551    invalid_denominator_value: u32,
552    label: &str,
553    backend: &str,
554) -> u32 {
555    finite_f64_ratio_basis_points(
556        numerator,
557        denominator,
558        invalid_numerator_value,
559        invalid_denominator_value,
560        label,
561        backend,
562        finite_f64_to_u32_round,
563    )
564}
565
566/// Convert a finite scalar where `1.0 == 10_000 bps` into floor basis points.
567#[must_use]
568pub fn finite_f64_unit_basis_points_trunc(
569    value: f64,
570    invalid_value: u32,
571    label: &str,
572    backend: &str,
573) -> u32 {
574    if !value.is_finite() {
575        tracing::error!(
576            "{backend} {label} value {value} is not finite. Fix: normalize telemetry before release-path planning."
577        );
578        return invalid_value;
579    }
580    finite_f64_to_u32_trunc(
581        value.max(0.0) * f64::from(BASIS_POINTS_DENOMINATOR),
582        label,
583        backend,
584    )
585}
586
587fn finite_f64_ratio_basis_points(
588    numerator: f64,
589    denominator: f64,
590    invalid_numerator_value: u32,
591    invalid_denominator_value: u32,
592    label: &str,
593    backend: &str,
594    convert: fn(f64, &str, &str) -> u32,
595) -> u32 {
596    if !numerator.is_finite() {
597        tracing::error!(
598            "{backend} {label} numerator {numerator} is not finite. Fix: record finite dispatch timing before release-path planning."
599        );
600        return invalid_numerator_value;
601    }
602    if !denominator.is_finite() || denominator <= 0.0 {
603        tracing::error!(
604            "{backend} {label} denominator {denominator} is not finite and positive. Fix: record finite dispatch timing before release-path planning."
605        );
606        return invalid_denominator_value;
607    }
608    if numerator <= 0.0 {
609        return 0;
610    }
611    convert(
612        (numerator / denominator) * f64::from(BASIS_POINTS_DENOMINATOR),
613        label,
614        backend,
615    )
616}
617
618/// Compute `ceil(value / divisor)` in `u64`, returning `None` for zero divisors
619/// or arithmetic overflow.
620#[must_use]
621pub fn checked_ceil_div_u64(value: u64, divisor: u64) -> Option<u64> {
622    if divisor == 0 {
623        return None;
624    }
625    if value == 0 {
626        return Some(0);
627    }
628    ((value - 1) / divisor).checked_add(1)
629}
630
631/// Multiply three `u32` dimensions into a `u64` without wraparound.
632///
633/// CUDA, WGPU, and runtime launch geometry all cross this same host/device
634/// boundary. Keeping the primitive here prevents each backend from carrying a
635/// slightly different overflow policy for `[x, y, z]` launch dimensions.
636#[must_use]
637pub fn checked_dim_product_u64(dims: [u32; 3]) -> Option<u64> {
638    u64::from(dims[0])
639        .checked_mul(u64::from(dims[1]))
640        .and_then(|xy| xy.checked_mul(u64::from(dims[2])))
641}
642
643/// Multiply three `u32` dimensions into a `u32` without wraparound.
644#[must_use]
645pub fn checked_dim_product_u32(dims: [u32; 3]) -> Option<u32> {
646    u32::try_from(checked_dim_product_u64(dims)?).ok()
647}
648
649/// Align `value` upward to `alignment`, after applying `min_value`.
650///
651/// # Errors
652/// Returns [`BackendError::InvalidProgram`] when `alignment` is zero or the
653/// padded value would overflow `u64`.
654pub fn align_up_u64(
655    value: u64,
656    alignment: u64,
657    min_value: u64,
658    label: &str,
659    backend: &str,
660) -> Result<u64, BackendError> {
661    if alignment == 0 {
662        return Err(BackendError::InvalidProgram {
663            fix: format!("Fix: {backend} {label} alignment must be non-zero before padding."),
664        });
665    }
666    let normalized = value.max(min_value);
667    let remainder = normalized % alignment;
668    if remainder == 0 {
669        return Ok(normalized);
670    }
671    normalized
672        .checked_add(alignment - remainder)
673        .ok_or_else(|| BackendError::InvalidProgram {
674            fix: format!(
675                "Fix: {backend} {label} overflows u64 while padding to {alignment}-byte alignment; split the workload before crossing the host/device boundary."
676            ),
677        })
678}
679
680/// Align `value` upward to `alignment`, after applying `min_value`.
681///
682/// # Errors
683/// Returns [`BackendError::InvalidProgram`] when `alignment` is zero or the
684/// padded value would overflow `usize`.
685pub fn align_up_usize(
686    value: usize,
687    alignment: usize,
688    min_value: usize,
689    label: &str,
690    backend: &str,
691) -> Result<usize, BackendError> {
692    if alignment == 0 {
693        return Err(BackendError::InvalidProgram {
694            fix: format!("Fix: {backend} {label} alignment must be non-zero before padding."),
695        });
696    }
697    let normalized = value.max(min_value);
698    let remainder = normalized % alignment;
699    if remainder == 0 {
700        return Ok(normalized);
701    }
702    normalized
703        .checked_add(alignment - remainder)
704        .ok_or_else(|| BackendError::InvalidProgram {
705            fix: format!(
706                "Fix: {backend} {label} overflows usize while padding to {alignment}-byte alignment; split the workload before crossing the host/device boundary."
707            ),
708        })
709}
710
711#[cfg(test)]
712mod tests {
713    use super::*;
714
715    #[test]
716    fn usize_boundary_accepts_fit_values() {
717        assert_eq!(usize_to_u64(17, "bytes", "test").unwrap(), 17);
718    }
719
720    #[test]
721    fn backend_numeric_policy_carries_backend_label_without_local_wrappers() {
722        let policy = BackendNumericPolicy::new("CUDA");
723        assert_eq!(policy.backend(), "CUDA");
724        assert_eq!(policy.usize_to_u64(17, "bytes").unwrap(), 17);
725        assert_eq!(policy.ratio_basis_points_u64(1, 4, 0, "pressure"), 2_500);
726        assert_eq!(
727            policy.finite_f64_ratio_basis_points_round(1.0, 6.0, 99, 77, "ratio"),
728            1_667
729        );
730        assert_eq!(policy.checked_ceil_div_u64(65_537, 65_536), Some(2));
731        assert_eq!(
732            policy.checked_dim_product_u64([65_535, 2, 3]),
733            Some(393_210)
734        );
735        assert_eq!(
736            policy.checked_dim_product_u32([65_535, 2, 3]),
737            Some(393_210)
738        );
739
740        let err = policy
741            .u128_to_u64(u128::from(u64::MAX) + 1, "resident bytes")
742            .unwrap_err();
743        let rendered = err.to_string();
744        assert!(
745            rendered.contains("CUDA resident bytes"),
746            "backend policy diagnostics must carry the backend label and boundary name: {rendered}"
747        );
748    }
749
750    #[test]
751    fn u128_boundary_rejects_overflow_with_backend_label() {
752        let err = u128_to_u64(u128::from(u64::MAX) + 1, "counter", "test").unwrap_err();
753        let rendered = err.to_string();
754        assert!(
755            rendered.contains("test counter"),
756            "numeric boundary diagnostics must identify the backend and label: {rendered}"
757        );
758    }
759
760    #[test]
761    fn rounded_f64_rejects_non_finite_values() {
762        let err = rounded_f64_to_u64(f64::NAN, "timestamp", "test").unwrap_err();
763        let rendered = err.to_string();
764        assert!(
765            rendered.contains("timestamp"),
766            "rounded timestamp diagnostics must include the failing label: {rendered}"
767        );
768    }
769
770    #[test]
771    fn ratio_basis_points_preserves_zero_denominator_policy() {
772        assert_eq!(
773            ratio_basis_points_u64(1, 0, u32::MAX, "pressure", "test"),
774            u32::MAX
775        );
776        assert_eq!(ratio_basis_points_u64(0, 0, 0, "savings", "test"), 0);
777    }
778
779    #[test]
780    fn ratio_basis_points_uses_wide_arithmetic_before_clamping() {
781        assert_eq!(
782            ratio_basis_points_u64(u64::MAX, u64::MAX / 2, 0, "wide", "test"),
783            20_000
784        );
785        assert_eq!(
786            ratio_basis_points_u64(u64::MAX, 1, 0, "overflow", "test"),
787            u32::MAX
788        );
789    }
790
791    #[test]
792    fn wide_ratio_basis_points_retains_u64_telemetry_domain() {
793        assert_eq!(ratio_basis_points_u64_wide(3, 2, 0, "wide", "test"), 15_000);
794        assert_eq!(
795            ratio_basis_points_u64_wide(u64::MAX, u64::MAX / 4, 0, "wide", "test"),
796            40_000
797        );
798        assert_eq!(
799            ratio_basis_points_u64_wide(u64::MAX, 1, 0, "overflow", "test"),
800            u64::MAX
801        );
802    }
803
804    #[test]
805    fn finite_f64_to_u32_helpers_pin_invalid_values() {
806        assert_eq!(finite_f64_to_u32_trunc(12.9, "value", "test"), 12);
807        assert_eq!(finite_f64_to_u32_round(12.5, "value", "test"), 13);
808        assert_eq!(finite_f64_to_u32_trunc(-1.0, "value", "test"), 0);
809        assert_eq!(
810            finite_f64_to_u32_round(f64::INFINITY, "value", "test"),
811            u32::MAX
812        );
813        assert_eq!(
814            finite_f64_to_u32_trunc(f64::from(u32::MAX) * 2.0, "value", "test"),
815            u32::MAX
816        );
817    }
818
819    #[test]
820    fn finite_f64_basis_point_helpers_pin_invalid_policies() {
821        assert_eq!(
822            finite_f64_ratio_basis_points_trunc(1.0, 4.0, 99, 77, "ratio", "test"),
823            2_500
824        );
825        assert_eq!(
826            finite_f64_ratio_basis_points_round(1.0, 6.0, 99, 77, "ratio", "test"),
827            1_667
828        );
829        assert_eq!(
830            finite_f64_ratio_basis_points_trunc(f64::NAN, 1.0, 99, 77, "ratio", "test"),
831            99
832        );
833        assert_eq!(
834            finite_f64_ratio_basis_points_trunc(1.0, 0.0, 99, 77, "ratio", "test"),
835            77
836        );
837        assert_eq!(
838            finite_f64_ratio_basis_points_round(-1.0, 1.0, 99, 77, "ratio", "test"),
839            0
840        );
841        assert_eq!(
842            finite_f64_unit_basis_points_trunc(0.25, 33, "unit", "test"),
843            2_500
844        );
845        assert_eq!(
846            finite_f64_unit_basis_points_trunc(f64::INFINITY, 33, "unit", "test"),
847            33
848        );
849    }
850
851    #[test]
852    fn alignment_helpers_pad_minimums_and_reject_overflow() {
853        assert_eq!(align_up_u64(0, 4, 4, "copy", "test").unwrap(), 4);
854        assert_eq!(align_up_u64(5, 4, 0, "copy", "test").unwrap(), 8);
855        assert_eq!(align_up_usize(0, 4, 4, "copy", "test").unwrap(), 4);
856        assert_eq!(align_up_usize(5, 4, 0, "copy", "test").unwrap(), 8);
857
858        let zero_alignment = align_up_u64(1, 0, 0, "copy", "test").unwrap_err();
859        assert!(
860            zero_alignment
861                .to_string()
862                .contains("alignment must be non-zero"),
863            "zero-alignment diagnostics must be actionable: {zero_alignment}"
864        );
865
866        let overflow_u64 = align_up_u64(u64::MAX, 4, 0, "copy", "test").unwrap_err();
867        assert!(
868            overflow_u64.to_string().contains("overflows u64"),
869            "u64 alignment overflow diagnostics must name the target type: {overflow_u64}"
870        );
871
872        let overflow_usize = align_up_usize(usize::MAX, 4, 0, "copy", "test").unwrap_err();
873        assert!(
874            overflow_usize.to_string().contains("overflows usize"),
875            "usize alignment overflow diagnostics must name the target type: {overflow_usize}"
876        );
877    }
878
879    #[test]
880    fn checked_ceil_div_u64_handles_cuda_queue_boundaries() {
881        assert_eq!(checked_ceil_div_u64(0, 64), Some(0));
882        assert_eq!(checked_ceil_div_u64(1, 64), Some(1));
883        assert_eq!(checked_ceil_div_u64(65_537, 65_536), Some(2));
884        assert_eq!(
885            checked_ceil_div_u64(u64::MAX, 65_536),
886            Some(281_474_976_710_656)
887        );
888        assert_eq!(checked_ceil_div_u64(u64::MAX, 1), Some(u64::MAX));
889        assert_eq!(checked_ceil_div_u64(1, 0), None);
890    }
891
892    #[test]
893    fn checked_dim_product_helpers_cover_cuda_launch_boundaries() {
894        assert_eq!(checked_dim_product_u64([1, 1, 1]), Some(1));
895        assert_eq!(checked_dim_product_u64([0, 999, 999]), Some(0));
896        assert_eq!(checked_dim_product_u64([65_535, 2, 3]), Some(393_210));
897        assert_eq!(checked_dim_product_u32([65_535, 2, 3]), Some(393_210));
898        assert_eq!(
899            checked_dim_product_u64([u32::MAX, u32::MAX, u32::MAX]),
900            None
901        );
902        assert_eq!(checked_dim_product_u32([u32::MAX, 2, 1]), None);
903    }
904
905    #[test]
906    fn generated_dim_product_matrix_matches_wide_integer_reference() {
907        const VALUES: [u32; 9] = [0, 1, 2, 3, 7, 32, 255, 65_535, u32::MAX];
908        for x in VALUES {
909            for y in VALUES {
910                for z in VALUES {
911                    let wide = u128::from(x) * u128::from(y) * u128::from(z);
912                    let expected_u64 = u64::try_from(wide).ok();
913                    let expected_u32 = u32::try_from(wide).ok();
914                    assert_eq!(checked_dim_product_u64([x, y, z]), expected_u64);
915                    assert_eq!(checked_dim_product_u32([x, y, z]), expected_u32);
916                }
917            }
918        }
919    }
920
921    #[test]
922    fn ratio_parts_per_million_uses_wide_arithmetic_and_pins_overflow() {
923        assert_eq!(
924            ratio_parts_per_million_u64(1, 4, 0, "commit-rate", "test"),
925            250_000
926        );
927        assert_eq!(
928            ratio_parts_per_million_u64(1, 0, 7, "commit-rate", "test"),
929            7
930        );
931        assert_eq!(
932            ratio_parts_per_million_u64(u64::MAX, 1, 0, "commit-rate", "test"),
933            u32::MAX
934        );
935    }
936
937    #[test]
938    fn basis_point_composition_and_scaling_helpers_are_widened() {
939        assert_eq!(
940            compose_basis_points_u32(15_000, 2_500, "compose", "test"),
941            3_750
942        );
943        assert_eq!(
944            compose_basis_points_u32(u32::MAX, u32::MAX, "compose", "test"),
945            u32::MAX
946        );
947        assert_eq!(
948            checked_compose_basis_points_u64(50_000, 20_000),
949            Some(100_000)
950        );
951        assert_eq!(checked_compose_basis_points_u64(u64::MAX, u64::MAX), None);
952        assert_eq!(
953            scale_u64_by_basis_points_round_clamped(10, 1_000_000, 10, 40_000, "scale", "test"),
954            40
955        );
956        assert_eq!(
957            scale_u64_by_basis_points_round_clamped(7, 0, 7, 40_000, "scale", "test"),
958            7
959        );
960        assert_eq!(
961            scale_u64_by_basis_points_floor_min(1, 1, 1, "scale", "test"),
962            1
963        );
964        assert_eq!(
965            weighted_u64_by_basis_points_u128(u64::MAX, 10_000),
966            u128::from(u64::MAX)
967        );
968    }
969}