Skip to main content

fdars_core/explain_generic/
friedman.rs

1use crate::error::FdarError;
2use crate::explain::{make_grid, FriedmanHResult};
3use crate::matrix::FdMatrix;
4
5use super::FpcPredictor;
6
7/// Compute the 2D partial dependence surface over a grid of two components.
8///
9/// For each pair `(grid_j[gj], grid_k[gk])`, fixes components `component_j`
10/// and `component_k` to those grid values and averages the model prediction
11/// over all `n` observations.
12fn compute_pdp_grid_2d(
13    model: &dyn FpcPredictor,
14    scores: &FdMatrix,
15    grid_j: &[f64],
16    grid_k: &[f64],
17    component_j: usize,
18    component_k: usize,
19    ncomp: usize,
20    n: usize,
21    scalar_covariates: Option<&FdMatrix>,
22    p_scalar: usize,
23    n_grid: usize,
24) -> FdMatrix {
25    let mut pdp_2d = FdMatrix::zeros(n_grid, n_grid);
26    for (gj_idx, &gj) in grid_j.iter().enumerate() {
27        for (gk_idx, &gk) in grid_k.iter().enumerate() {
28            let mut sum = 0.0;
29            for i in 0..n {
30                let mut s: Vec<f64> = (0..ncomp).map(|c| scores[(i, c)]).collect();
31                s[component_j] = gj;
32                s[component_k] = gk;
33                let obs_z: Option<Vec<f64>> = if p_scalar > 0 {
34                    scalar_covariates.map(|sc| (0..p_scalar).map(|j| sc[(i, j)]).collect())
35                } else {
36                    None
37                };
38                sum += model.predict_from_scores(&s, obs_z.as_deref());
39            }
40            pdp_2d[(gj_idx, gk_idx)] = sum / n as f64;
41        }
42    }
43    pdp_2d
44}
45
46/// Compute the H-squared statistic from marginal and joint PDP surfaces.
47///
48/// First computes the mean prediction `f_bar` (averaging over all observations),
49/// then delegates to [`crate::explain::compute_h_squared`] for the actual
50/// interaction / total-variance ratio.
51fn compute_h_squared_from_pdps(
52    model: &dyn FpcPredictor,
53    scores: &FdMatrix,
54    pdp_2d: &FdMatrix,
55    pdp_j: &[f64],
56    pdp_k: &[f64],
57    ncomp: usize,
58    n: usize,
59    scalar_covariates: Option<&FdMatrix>,
60    p_scalar: usize,
61    n_grid: usize,
62) -> f64 {
63    let f_bar: f64 = (0..n)
64        .map(|i| {
65            let s: Vec<f64> = (0..ncomp).map(|c| scores[(i, c)]).collect();
66            let obs_z: Option<Vec<f64>> = if p_scalar > 0 {
67                scalar_covariates.map(|sc| (0..p_scalar).map(|j| sc[(i, j)]).collect())
68            } else {
69                None
70            };
71            model.predict_from_scores(&s, obs_z.as_deref())
72        })
73        .sum::<f64>()
74        / n as f64;
75    crate::explain::compute_h_squared(pdp_2d, pdp_j, pdp_k, f_bar, n_grid)
76}
77
78/// Generic Friedman H-statistic for interaction between two FPC components.
79///
80/// # Errors
81///
82/// Returns [`FdarError::InvalidParameter`] if `component_j == component_k`,
83/// `n_grid < 2`, or either component index is `>= ncomp`.
84/// Returns [`FdarError::InvalidDimension`] if `data` has zero rows or its
85/// column count does not match the model.
86#[must_use = "expensive computation whose result should not be discarded"]
87pub fn generic_friedman_h(
88    model: &dyn FpcPredictor,
89    data: &FdMatrix,
90    scalar_covariates: Option<&FdMatrix>,
91    component_j: usize,
92    component_k: usize,
93    n_grid: usize,
94) -> Result<FriedmanHResult, FdarError> {
95    if component_j == component_k {
96        return Err(FdarError::InvalidParameter {
97            parameter: "component_j/component_k",
98            message: "component_j and component_k must differ".into(),
99        });
100    }
101    let (n, m) = data.shape();
102    let ncomp = model.ncomp();
103    if n == 0 {
104        return Err(FdarError::InvalidDimension {
105            parameter: "data",
106            expected: "n > 0".into(),
107            actual: "0 rows".into(),
108        });
109    }
110    if m != model.fpca_mean().len() {
111        return Err(FdarError::InvalidDimension {
112            parameter: "data columns",
113            expected: model.fpca_mean().len().to_string(),
114            actual: m.to_string(),
115        });
116    }
117    if n_grid < 2 {
118        return Err(FdarError::InvalidParameter {
119            parameter: "n_grid",
120            message: format!("n_grid must be >= 2, got {n_grid}"),
121        });
122    }
123    if component_j >= ncomp || component_k >= ncomp {
124        return Err(FdarError::InvalidParameter {
125            parameter: "component",
126            message: format!(
127                "component_j={component_j} or component_k={component_k} >= ncomp={ncomp}"
128            ),
129        });
130    }
131
132    let scores = model.project(data);
133    let grid_j = make_grid(&scores, component_j, n_grid);
134    let grid_k = make_grid(&scores, component_k, n_grid);
135    let p_scalar = scalar_covariates.map_or(0, crate::matrix::FdMatrix::ncols);
136
137    // Compute 1D PDPs via generic predict
138    let pdp_j: Vec<f64> = grid_j
139        .iter()
140        .map(|&gval| {
141            let mut sum = 0.0;
142            for i in 0..n {
143                let mut s: Vec<f64> = (0..ncomp).map(|c| scores[(i, c)]).collect();
144                s[component_j] = gval;
145                let obs_z: Option<Vec<f64>> = if p_scalar > 0 {
146                    scalar_covariates.map(|sc| (0..p_scalar).map(|j| sc[(i, j)]).collect())
147                } else {
148                    None
149                };
150                sum += model.predict_from_scores(&s, obs_z.as_deref());
151            }
152            sum / n as f64
153        })
154        .collect();
155
156    let pdp_k: Vec<f64> = grid_k
157        .iter()
158        .map(|&gval| {
159            let mut sum = 0.0;
160            for i in 0..n {
161                let mut s: Vec<f64> = (0..ncomp).map(|c| scores[(i, c)]).collect();
162                s[component_k] = gval;
163                let obs_z: Option<Vec<f64>> = if p_scalar > 0 {
164                    scalar_covariates.map(|sc| (0..p_scalar).map(|j| sc[(i, j)]).collect())
165                } else {
166                    None
167                };
168                sum += model.predict_from_scores(&s, obs_z.as_deref());
169            }
170            sum / n as f64
171        })
172        .collect();
173
174    let pdp_2d = compute_pdp_grid_2d(
175        model,
176        &scores,
177        &grid_j,
178        &grid_k,
179        component_j,
180        component_k,
181        ncomp,
182        n,
183        scalar_covariates,
184        p_scalar,
185        n_grid,
186    );
187
188    let h_squared = compute_h_squared_from_pdps(
189        model,
190        &scores,
191        &pdp_2d,
192        &pdp_j,
193        &pdp_k,
194        ncomp,
195        n,
196        scalar_covariates,
197        p_scalar,
198        n_grid,
199    );
200
201    Ok(FriedmanHResult {
202        component_j,
203        component_k,
204        h_squared,
205        grid_j,
206        grid_k,
207        pdp_2d,
208    })
209}