Skip to main content

gam_problem/
execution_path.rs

1//! Execution-path telemetry for hot solver/GPU paths.
2
3use serde::{Deserialize, Serialize};
4
5/// Truthful execution-path classifier for a fit's hot inner solve (issue #1017).
6///
7/// This replaces the lying `used_device: bool` on GPU-owned result structs: a
8/// bare boolean could not distinguish "ran on the device with the constant
9/// Hessian factors kept resident across iterations" from "re-uploaded and
10/// re-factored every iterate" from "silently fell back to the CPU", so a
11/// device that quietly declined still reported `used_device = true` at some
12/// call sites. Each variant names exactly one of the four real backends the
13/// resident solver can take, so telemetry and tests assert the concrete path
14/// instead of a yes/no that hid the original silent-fallback bug.
15#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
16#[serde(rename_all = "kebab-case")]
17pub enum ExecutionPath {
18    /// Host-only arithmetic; no device work was performed.
19    #[default]
20    Cpu,
21    /// GPU path that re-uploads `D`/`B`/`g` and re-factors every iterate (the
22    /// pre-residency baseline).
23    GpuReupload,
24    /// GPU path that keeps the constant Hessian factors resident across
25    /// iterations and uploads only the per-iterate gradient, for a single
26    /// linearization (one frozen gate/basis frame).
27    GpuResidentLinearization,
28    /// Full device-resident inner Newton loop (the Phase-3 residency fix).
29    GpuResidentFull,
30}
31
32impl ExecutionPath {
33    /// Stable lowercase identifier for logs/telemetry dictionaries.
34    #[inline]
35    pub const fn as_str(self) -> &'static str {
36        match self {
37            Self::Cpu => "cpu",
38            Self::GpuReupload => "gpu-reupload",
39            Self::GpuResidentLinearization => "gpu-resident-linearization",
40            Self::GpuResidentFull => "gpu-resident-full",
41        }
42    }
43
44    /// True when any part of the path executed on the device.
45    #[inline]
46    pub const fn used_device(self) -> bool {
47        !matches!(self, Self::Cpu)
48    }
49}
50
51#[cfg(test)]
52mod tests {
53    use super::*;
54
55    #[test]
56    fn as_str_cpu() {
57        assert_eq!(ExecutionPath::Cpu.as_str(), "cpu");
58    }
59
60    #[test]
61    fn as_str_gpu_reupload() {
62        assert_eq!(ExecutionPath::GpuReupload.as_str(), "gpu-reupload");
63    }
64
65    #[test]
66    fn as_str_gpu_resident_linearization() {
67        assert_eq!(
68            ExecutionPath::GpuResidentLinearization.as_str(),
69            "gpu-resident-linearization"
70        );
71    }
72
73    #[test]
74    fn as_str_gpu_resident_full() {
75        assert_eq!(ExecutionPath::GpuResidentFull.as_str(), "gpu-resident-full");
76    }
77
78    #[test]
79    fn used_device_false_for_cpu() {
80        assert!(!ExecutionPath::Cpu.used_device());
81    }
82
83    #[test]
84    fn used_device_true_for_all_gpu_variants() {
85        assert!(ExecutionPath::GpuReupload.used_device());
86        assert!(ExecutionPath::GpuResidentLinearization.used_device());
87        assert!(ExecutionPath::GpuResidentFull.used_device());
88    }
89
90    #[test]
91    fn default_is_cpu() {
92        assert_eq!(ExecutionPath::default(), ExecutionPath::Cpu);
93        assert!(!ExecutionPath::default().used_device());
94    }
95}