gam_problem/execution_path.rs
1//! Execution-path telemetry for hot solver/GPU paths.
2
3use serde::{Deserialize, Serialize};
4
5/// Truthful execution-path classifier for a fit's hot inner solve (issue #1017).
6///
7/// This replaces the lying `used_device: bool` on GPU-owned result structs: a
8/// bare boolean could not distinguish "ran on the device with the constant
9/// Hessian factors kept resident across iterations" from "re-uploaded and
10/// re-factored every iterate" from "silently fell back to the CPU", so a
11/// device that quietly declined still reported `used_device = true` at some
12/// call sites. Each variant names exactly one of the four real backends the
13/// resident solver can take, so telemetry and tests assert the concrete path
14/// instead of a yes/no that hid the original silent-fallback bug.
15#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
16#[serde(rename_all = "kebab-case")]
17pub enum ExecutionPath {
18 /// Host-only arithmetic; no device work was performed.
19 #[default]
20 Cpu,
21 /// GPU path that re-uploads `D`/`B`/`g` and re-factors every iterate (the
22 /// pre-residency baseline).
23 GpuReupload,
24 /// GPU path that keeps the constant Hessian factors resident across
25 /// iterations and uploads only the per-iterate gradient, for a single
26 /// linearization (one frozen gate/basis frame).
27 GpuResidentLinearization,
28 /// Full device-resident inner Newton loop (the Phase-3 residency fix).
29 GpuResidentFull,
30}
31
32impl ExecutionPath {
33 /// Stable lowercase identifier for logs/telemetry dictionaries.
34 #[inline]
35 pub const fn as_str(self) -> &'static str {
36 match self {
37 Self::Cpu => "cpu",
38 Self::GpuReupload => "gpu-reupload",
39 Self::GpuResidentLinearization => "gpu-resident-linearization",
40 Self::GpuResidentFull => "gpu-resident-full",
41 }
42 }
43
44 /// True when any part of the path executed on the device.
45 #[inline]
46 pub const fn used_device(self) -> bool {
47 !matches!(self, Self::Cpu)
48 }
49}