Skip to main content

fdars_core/explain/
pdp.rs

1//! PDP/ICE and beta decomposition.
2
3use super::helpers::*;
4use crate::error::FdarError;
5use crate::matrix::FdMatrix;
6use crate::scalar_on_function::{sigmoid, FregreLmResult, FunctionalLogisticResult};
7
8/// Result of a functional partial dependence plot.
9#[derive(Debug, Clone, PartialEq)]
10pub struct FunctionalPdpResult {
11    /// FPC score grid values (length n_grid).
12    pub grid_values: Vec<f64>,
13    /// Average prediction across observations at each grid point (length n_grid).
14    pub pdp_curve: Vec<f64>,
15    /// Individual conditional expectation curves (n x n_grid).
16    pub ice_curves: FdMatrix,
17    /// Which FPC component was varied.
18    pub component: usize,
19}
20
21/// Functional PDP/ICE for a linear functional regression model.
22///
23/// Varies the FPC score for `component` across a grid while keeping other scores
24/// fixed, producing ICE curves and their average (PDP).
25///
26/// For a linear model, ICE curves are parallel lines (same slope, different intercepts).
27///
28/// # Arguments
29/// * `fit` -- A fitted [`FregreLmResult`]
30/// * `data` -- Original functional predictor matrix (n x m)
31/// * `scalar_covariates` -- Optional scalar covariates (n x p)
32/// * `component` -- Which FPC component to vary (0-indexed, must be < fit.ncomp)
33/// * `n_grid` -- Number of grid points (must be >= 2)
34///
35/// # Errors
36///
37/// Returns [`FdarError::InvalidDimension`] if `data` has zero rows, its column
38/// count does not match `fit.fpca.mean`, or its row count does not match
39/// `fit.fitted_values`.
40/// Returns [`FdarError::InvalidParameter`] if `component >= fit.ncomp` or
41/// `n_grid < 2`.
42#[must_use = "expensive computation whose result should not be discarded"]
43pub fn functional_pdp(
44    fit: &FregreLmResult,
45    data: &FdMatrix,
46    _scalar_covariates: Option<&FdMatrix>,
47    component: usize,
48    n_grid: usize,
49) -> Result<FunctionalPdpResult, FdarError> {
50    let (n, m) = data.shape();
51    if n == 0 {
52        return Err(FdarError::InvalidDimension {
53            parameter: "data",
54            expected: ">0 rows".into(),
55            actual: "0".into(),
56        });
57    }
58    if m != fit.fpca.mean.len() {
59        return Err(FdarError::InvalidDimension {
60            parameter: "data",
61            expected: format!("{} columns", fit.fpca.mean.len()),
62            actual: format!("{m}"),
63        });
64    }
65    if n != fit.fitted_values.len() {
66        return Err(FdarError::InvalidDimension {
67            parameter: "data",
68            expected: format!("{} rows matching fitted_values", fit.fitted_values.len()),
69            actual: format!("{n}"),
70        });
71    }
72    if component >= fit.ncomp {
73        return Err(FdarError::InvalidParameter {
74            parameter: "component",
75            message: format!("component {} >= ncomp {}", component, fit.ncomp),
76        });
77    }
78    if n_grid < 2 {
79        return Err(FdarError::InvalidParameter {
80            parameter: "n_grid",
81            message: "must be >= 2".into(),
82        });
83    }
84
85    let ncomp = fit.ncomp;
86    let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
87    let grid_values = make_grid(&scores, component, n_grid);
88
89    let coef_c = fit.coefficients[1 + component];
90    let mut ice_curves = FdMatrix::zeros(n, n_grid);
91    for i in 0..n {
92        let base = fit.fitted_values[i] - coef_c * scores[(i, component)];
93        for g in 0..n_grid {
94            ice_curves[(i, g)] = base + coef_c * grid_values[g];
95        }
96    }
97
98    let pdp_curve = ice_to_pdp(&ice_curves, n, n_grid);
99
100    Ok(FunctionalPdpResult {
101        grid_values,
102        pdp_curve,
103        ice_curves,
104        component,
105    })
106}
107
108/// Functional PDP/ICE for a functional logistic regression model.
109///
110/// Predictions pass through sigmoid, so ICE curves are non-parallel.
111///
112/// # Arguments
113/// * `fit` -- A fitted [`FunctionalLogisticResult`]
114/// * `data` -- Original functional predictor matrix (n x m)
115/// * `scalar_covariates` -- Optional scalar covariates (n x p)
116/// * `component` -- Which FPC component to vary (0-indexed, must be < fit.ncomp)
117/// * `n_grid` -- Number of grid points (must be >= 2)
118///
119/// # Errors
120///
121/// Returns [`FdarError::InvalidDimension`] if `data` has zero rows or its column
122/// count does not match `fit.fpca.mean`.
123/// Returns [`FdarError::InvalidParameter`] if `component >= fit.ncomp`,
124/// `n_grid < 2`, or `scalar_covariates` is `None` when the model has scalar
125/// covariates.
126#[must_use = "expensive computation whose result should not be discarded"]
127pub fn functional_pdp_logistic(
128    fit: &FunctionalLogisticResult,
129    data: &FdMatrix,
130    scalar_covariates: Option<&FdMatrix>,
131    component: usize,
132    n_grid: usize,
133) -> Result<FunctionalPdpResult, FdarError> {
134    let (n, m) = data.shape();
135    if n == 0 {
136        return Err(FdarError::InvalidDimension {
137            parameter: "data",
138            expected: ">0 rows".into(),
139            actual: "0".into(),
140        });
141    }
142    if m != fit.fpca.mean.len() {
143        return Err(FdarError::InvalidDimension {
144            parameter: "data",
145            expected: format!("{} columns", fit.fpca.mean.len()),
146            actual: format!("{m}"),
147        });
148    }
149    if component >= fit.ncomp {
150        return Err(FdarError::InvalidParameter {
151            parameter: "component",
152            message: format!("component {} >= ncomp {}", component, fit.ncomp),
153        });
154    }
155    if n_grid < 2 {
156        return Err(FdarError::InvalidParameter {
157            parameter: "n_grid",
158            message: "must be >= 2".into(),
159        });
160    }
161
162    let ncomp = fit.ncomp;
163    let p_scalar = fit.gamma.len();
164    if p_scalar > 0 && scalar_covariates.is_none() {
165        return Err(FdarError::InvalidParameter {
166            parameter: "scalar_covariates",
167            message: "required when model has scalar covariates".into(),
168        });
169    }
170
171    let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
172    let grid_values = make_grid(&scores, component, n_grid);
173
174    let mut ice_curves = FdMatrix::zeros(n, n_grid);
175    let coef_c = fit.coefficients[1 + component];
176    for i in 0..n {
177        let eta_base = logistic_eta_base(
178            fit.intercept,
179            &fit.coefficients,
180            &fit.gamma,
181            &scores,
182            scalar_covariates,
183            i,
184            ncomp,
185            component,
186        );
187        for g in 0..n_grid {
188            ice_curves[(i, g)] = sigmoid(eta_base + coef_c * grid_values[g]);
189        }
190    }
191
192    let pdp_curve = ice_to_pdp(&ice_curves, n, n_grid);
193
194    Ok(FunctionalPdpResult {
195        grid_values,
196        pdp_curve,
197        ice_curves,
198        component,
199    })
200}
201
202// ---------------------------------------------------------------------------
203// Beta decomposition
204// ---------------------------------------------------------------------------
205
206/// Per-FPC decomposition of the functional coefficient beta(t).
207#[derive(Debug, Clone, PartialEq)]
208pub struct BetaDecomposition {
209    /// `components[k]` = coef_k * phi_k(t), each of length m.
210    pub components: Vec<Vec<f64>>,
211    /// FPC regression coefficients (length ncomp).
212    pub coefficients: Vec<f64>,
213    /// Proportion of ||beta(t)||^2 explained by each component.
214    pub variance_proportion: Vec<f64>,
215}
216
217/// Decompose beta(t) = sum_k coef_k * phi_k(t) for a linear functional regression.
218///
219/// # Errors
220///
221/// Returns [`FdarError::InvalidParameter`] if `fit.ncomp` is zero.
222/// Returns [`FdarError::InvalidDimension`] if `fit.fpca.mean` has zero length.
223#[must_use = "expensive computation whose result should not be discarded"]
224pub fn beta_decomposition(fit: &FregreLmResult) -> Result<BetaDecomposition, FdarError> {
225    let ncomp = fit.ncomp;
226    let m = fit.fpca.mean.len();
227    if ncomp == 0 {
228        return Err(FdarError::InvalidParameter {
229            parameter: "ncomp",
230            message: "must be > 0".into(),
231        });
232    }
233    if m == 0 {
234        return Err(FdarError::InvalidDimension {
235            parameter: "fpca.mean",
236            expected: ">0 length".into(),
237            actual: "0".into(),
238        });
239    }
240    decompose_beta(&fit.coefficients, &fit.fpca.rotation, ncomp, m)
241}
242
243/// Decompose beta(t) for a functional logistic regression.
244///
245/// # Errors
246///
247/// Returns [`FdarError::InvalidParameter`] if `fit.ncomp` is zero.
248/// Returns [`FdarError::InvalidDimension`] if `fit.fpca.mean` has zero length.
249#[must_use = "expensive computation whose result should not be discarded"]
250pub fn beta_decomposition_logistic(
251    fit: &FunctionalLogisticResult,
252) -> Result<BetaDecomposition, FdarError> {
253    let ncomp = fit.ncomp;
254    let m = fit.fpca.mean.len();
255    if ncomp == 0 {
256        return Err(FdarError::InvalidParameter {
257            parameter: "ncomp",
258            message: "must be > 0".into(),
259        });
260    }
261    if m == 0 {
262        return Err(FdarError::InvalidDimension {
263            parameter: "fpca.mean",
264            expected: ">0 length".into(),
265            actual: "0".into(),
266        });
267    }
268    decompose_beta(&fit.coefficients, &fit.fpca.rotation, ncomp, m)
269}
270
271fn decompose_beta(
272    coefficients: &[f64],
273    rotation: &FdMatrix,
274    ncomp: usize,
275    m: usize,
276) -> Result<BetaDecomposition, FdarError> {
277    let mut components = Vec::with_capacity(ncomp);
278    let mut coefs = Vec::with_capacity(ncomp);
279    let mut norms_sq = Vec::with_capacity(ncomp);
280
281    for k in 0..ncomp {
282        let ck = coefficients[1 + k];
283        coefs.push(ck);
284        let comp: Vec<f64> = (0..m).map(|j| ck * rotation[(j, k)]).collect();
285        let nsq: f64 = comp.iter().map(|v| v * v).sum();
286        norms_sq.push(nsq);
287        components.push(comp);
288    }
289
290    let total_sq: f64 = norms_sq.iter().sum();
291    let variance_proportion = if total_sq > 0.0 {
292        norms_sq.iter().map(|&s| s / total_sq).collect()
293    } else {
294        vec![0.0; ncomp]
295    };
296
297    Ok(BetaDecomposition {
298        components,
299        coefficients: coefs,
300        variance_proportion,
301    })
302}
303
304// ---------------------------------------------------------------------------
305// Significant regions
306// ---------------------------------------------------------------------------
307
308/// Direction of a significant region.
309#[derive(Debug, Clone, Copy, PartialEq, Eq)]
310pub enum SignificanceDirection {
311    Positive,
312    Negative,
313}
314
315/// A contiguous interval where the confidence band excludes zero.
316#[derive(Debug, Clone, PartialEq)]
317pub struct SignificantRegion {
318    /// Start index (inclusive).
319    pub start_idx: usize,
320    /// End index (inclusive).
321    pub end_idx: usize,
322    /// Direction of the effect.
323    pub direction: SignificanceDirection,
324}
325
326/// Identify contiguous regions where the CI `[lower, upper]` excludes zero.
327///
328/// # Errors
329///
330/// Returns [`FdarError::InvalidDimension`] if `lower` is empty or
331/// `lower.len() != upper.len()`.
332#[must_use = "expensive computation whose result should not be discarded"]
333pub fn significant_regions(
334    lower: &[f64],
335    upper: &[f64],
336) -> Result<Vec<SignificantRegion>, FdarError> {
337    if lower.is_empty() {
338        return Err(FdarError::InvalidDimension {
339            parameter: "lower",
340            expected: ">0 length".into(),
341            actual: "0".into(),
342        });
343    }
344    if lower.len() != upper.len() {
345        return Err(FdarError::InvalidDimension {
346            parameter: "upper",
347            expected: format!("{} (matching lower)", lower.len()),
348            actual: format!("{}", upper.len()),
349        });
350    }
351    let n = lower.len();
352    let mut regions = Vec::new();
353    let mut i = 0;
354    while i < n {
355        if let Some(d) = detect_direction(lower[i], upper[i]) {
356            let start = i;
357            i += 1;
358            while i < n && detect_direction(lower[i], upper[i]) == Some(d) {
359                i += 1;
360            }
361            regions.push(SignificantRegion {
362                start_idx: start,
363                end_idx: i - 1,
364                direction: d,
365            });
366        } else {
367            i += 1;
368        }
369    }
370    Ok(regions)
371}
372
373/// Build CI from beta(t) +/- z * SE, then find significant regions.
374///
375/// # Errors
376///
377/// Returns [`FdarError::InvalidDimension`] if `beta_t` is empty or
378/// `beta_t.len() != beta_se.len()`.
379#[must_use = "expensive computation whose result should not be discarded"]
380pub fn significant_regions_from_se(
381    beta_t: &[f64],
382    beta_se: &[f64],
383    z_alpha: f64,
384) -> Result<Vec<SignificantRegion>, FdarError> {
385    if beta_t.is_empty() {
386        return Err(FdarError::InvalidDimension {
387            parameter: "beta_t",
388            expected: ">0 length".into(),
389            actual: "0".into(),
390        });
391    }
392    if beta_t.len() != beta_se.len() {
393        return Err(FdarError::InvalidDimension {
394            parameter: "beta_se",
395            expected: format!("{} (matching beta_t)", beta_t.len()),
396            actual: format!("{}", beta_se.len()),
397        });
398    }
399    let lower: Vec<f64> = beta_t
400        .iter()
401        .zip(beta_se)
402        .map(|(b, s)| b - z_alpha * s)
403        .collect();
404    let upper: Vec<f64> = beta_t
405        .iter()
406        .zip(beta_se)
407        .map(|(b, s)| b + z_alpha * s)
408        .collect();
409    significant_regions(&lower, &upper)
410}
411
412/// Detect significance direction at a single point from CI bounds.
413fn detect_direction(lower: f64, upper: f64) -> Option<SignificanceDirection> {
414    if lower > 0.0 {
415        Some(SignificanceDirection::Positive)
416    } else if upper < 0.0 {
417        Some(SignificanceDirection::Negative)
418    } else {
419        None
420    }
421}