Skip to main content

fdars_core/explain/
pdp.rs

1//! PDP/ICE and beta decomposition.
2
3use super::helpers::{ice_to_pdp, logistic_eta_base, make_grid, project_scores};
4use crate::error::FdarError;
5use crate::matrix::FdMatrix;
6use crate::scalar_on_function::{sigmoid, FregreLmResult, FunctionalLogisticResult};
7
8/// Result of a functional partial dependence plot.
9#[derive(Debug, Clone, PartialEq)]
10#[non_exhaustive]
11pub struct FunctionalPdpResult {
12    /// FPC score grid values (length n_grid).
13    pub grid_values: Vec<f64>,
14    /// Average prediction across observations at each grid point (length n_grid).
15    pub pdp_curve: Vec<f64>,
16    /// Individual conditional expectation curves (n x n_grid).
17    pub ice_curves: FdMatrix,
18    /// Which FPC component was varied.
19    pub component: usize,
20}
21
22/// Functional PDP/ICE for a linear functional regression model.
23///
24/// Varies the FPC score for `component` across a grid while keeping other scores
25/// fixed, producing ICE curves and their average (PDP).
26///
27/// For a linear model, ICE curves are parallel lines (same slope, different intercepts).
28///
29/// # Arguments
30/// * `fit` -- A fitted [`FregreLmResult`]
31/// * `data` -- Original functional predictor matrix (n x m)
32/// * `scalar_covariates` -- Optional scalar covariates (n x p)
33/// * `component` -- Which FPC component to vary (0-indexed, must be < fit.ncomp)
34/// * `n_grid` -- Number of grid points (must be >= 2)
35///
36/// # Errors
37///
38/// Returns [`FdarError::InvalidDimension`] if `data` has zero rows, its column
39/// count does not match `fit.fpca.mean`, or its row count does not match
40/// `fit.fitted_values`.
41/// Returns [`FdarError::InvalidParameter`] if `component >= fit.ncomp` or
42/// `n_grid < 2`.
43///
44/// # Examples
45///
46/// ```
47/// use fdars_core::matrix::FdMatrix;
48/// use fdars_core::scalar_on_function::fregre_lm;
49/// use fdars_core::explain::functional_pdp;
50///
51/// let (n, m) = (20, 30);
52/// let data = FdMatrix::from_column_major(
53///     (0..n * m).map(|k| {
54///         let i = (k % n) as f64;
55///         let j = (k / n) as f64;
56///         ((i + 1.0) * j * 0.2).sin()
57///     }).collect(),
58///     n, m,
59/// ).unwrap();
60/// let y: Vec<f64> = (0..n).map(|i| (i as f64 * 0.5).sin()).collect();
61/// let fit = fregre_lm(&data, &y, None, 3).unwrap();
62/// let pdp = functional_pdp(&fit, &data, None, 0, 10).unwrap();
63/// assert_eq!(pdp.pdp_curve.len(), 10);
64/// assert_eq!(pdp.ice_curves.shape(), (20, 10));
65/// ```
66#[must_use = "expensive computation whose result should not be discarded"]
67pub fn functional_pdp(
68    fit: &FregreLmResult,
69    data: &FdMatrix,
70    _scalar_covariates: Option<&FdMatrix>,
71    component: usize,
72    n_grid: usize,
73) -> Result<FunctionalPdpResult, FdarError> {
74    let (n, m) = data.shape();
75    if n == 0 {
76        return Err(FdarError::InvalidDimension {
77            parameter: "data",
78            expected: ">0 rows".into(),
79            actual: "0".into(),
80        });
81    }
82    if m != fit.fpca.mean.len() {
83        return Err(FdarError::InvalidDimension {
84            parameter: "data",
85            expected: format!("{} columns", fit.fpca.mean.len()),
86            actual: format!("{m}"),
87        });
88    }
89    if n != fit.fitted_values.len() {
90        return Err(FdarError::InvalidDimension {
91            parameter: "data",
92            expected: format!("{} rows matching fitted_values", fit.fitted_values.len()),
93            actual: format!("{n}"),
94        });
95    }
96    if component >= fit.ncomp {
97        return Err(FdarError::InvalidParameter {
98            parameter: "component",
99            message: format!("component {} >= ncomp {}", component, fit.ncomp),
100        });
101    }
102    if n_grid < 2 {
103        return Err(FdarError::InvalidParameter {
104            parameter: "n_grid",
105            message: "must be >= 2".into(),
106        });
107    }
108
109    let ncomp = fit.ncomp;
110    let scores = project_scores(
111        data,
112        &fit.fpca.mean,
113        &fit.fpca.rotation,
114        ncomp,
115        &fit.fpca.weights,
116    );
117    let grid_values = make_grid(&scores, component, n_grid);
118
119    let coef_c = fit.coefficients[1 + component];
120    let mut ice_curves = FdMatrix::zeros(n, n_grid);
121    for i in 0..n {
122        let base = fit.fitted_values[i] - coef_c * scores[(i, component)];
123        for g in 0..n_grid {
124            ice_curves[(i, g)] = base + coef_c * grid_values[g];
125        }
126    }
127
128    let pdp_curve = ice_to_pdp(&ice_curves, n, n_grid);
129
130    Ok(FunctionalPdpResult {
131        grid_values,
132        pdp_curve,
133        ice_curves,
134        component,
135    })
136}
137
138/// Functional PDP/ICE for a functional logistic regression model.
139///
140/// Predictions pass through sigmoid, so ICE curves are non-parallel.
141///
142/// # Arguments
143/// * `fit` -- A fitted [`FunctionalLogisticResult`]
144/// * `data` -- Original functional predictor matrix (n x m)
145/// * `scalar_covariates` -- Optional scalar covariates (n x p)
146/// * `component` -- Which FPC component to vary (0-indexed, must be < fit.ncomp)
147/// * `n_grid` -- Number of grid points (must be >= 2)
148///
149/// # Errors
150///
151/// Returns [`FdarError::InvalidDimension`] if `data` has zero rows or its column
152/// count does not match `fit.fpca.mean`.
153/// Returns [`FdarError::InvalidParameter`] if `component >= fit.ncomp`,
154/// `n_grid < 2`, or `scalar_covariates` is `None` when the model has scalar
155/// covariates.
156#[must_use = "expensive computation whose result should not be discarded"]
157pub fn functional_pdp_logistic(
158    fit: &FunctionalLogisticResult,
159    data: &FdMatrix,
160    scalar_covariates: Option<&FdMatrix>,
161    component: usize,
162    n_grid: usize,
163) -> Result<FunctionalPdpResult, FdarError> {
164    let (n, m) = data.shape();
165    if n == 0 {
166        return Err(FdarError::InvalidDimension {
167            parameter: "data",
168            expected: ">0 rows".into(),
169            actual: "0".into(),
170        });
171    }
172    if m != fit.fpca.mean.len() {
173        return Err(FdarError::InvalidDimension {
174            parameter: "data",
175            expected: format!("{} columns", fit.fpca.mean.len()),
176            actual: format!("{m}"),
177        });
178    }
179    if component >= fit.ncomp {
180        return Err(FdarError::InvalidParameter {
181            parameter: "component",
182            message: format!("component {} >= ncomp {}", component, fit.ncomp),
183        });
184    }
185    if n_grid < 2 {
186        return Err(FdarError::InvalidParameter {
187            parameter: "n_grid",
188            message: "must be >= 2".into(),
189        });
190    }
191
192    let ncomp = fit.ncomp;
193    let p_scalar = fit.gamma.len();
194    if p_scalar > 0 && scalar_covariates.is_none() {
195        return Err(FdarError::InvalidParameter {
196            parameter: "scalar_covariates",
197            message: "required when model has scalar covariates".into(),
198        });
199    }
200
201    let scores = project_scores(
202        data,
203        &fit.fpca.mean,
204        &fit.fpca.rotation,
205        ncomp,
206        &fit.fpca.weights,
207    );
208    let grid_values = make_grid(&scores, component, n_grid);
209
210    let mut ice_curves = FdMatrix::zeros(n, n_grid);
211    let coef_c = fit.coefficients[1 + component];
212    for i in 0..n {
213        let eta_base = logistic_eta_base(
214            fit.intercept,
215            &fit.coefficients,
216            &fit.gamma,
217            &scores,
218            scalar_covariates,
219            i,
220            ncomp,
221            component,
222        );
223        for g in 0..n_grid {
224            ice_curves[(i, g)] = sigmoid(eta_base + coef_c * grid_values[g]);
225        }
226    }
227
228    let pdp_curve = ice_to_pdp(&ice_curves, n, n_grid);
229
230    Ok(FunctionalPdpResult {
231        grid_values,
232        pdp_curve,
233        ice_curves,
234        component,
235    })
236}
237
238// ---------------------------------------------------------------------------
239// Beta decomposition
240// ---------------------------------------------------------------------------
241
242/// Per-FPC decomposition of the functional coefficient beta(t).
243#[derive(Debug, Clone, PartialEq)]
244pub struct BetaDecomposition {
245    /// `components[k]` = coef_k * phi_k(t), each of length m.
246    pub components: Vec<Vec<f64>>,
247    /// FPC regression coefficients (length ncomp).
248    pub coefficients: Vec<f64>,
249    /// Proportion of ||beta(t)||^2 explained by each component.
250    pub variance_proportion: Vec<f64>,
251}
252
253/// Decompose beta(t) = sum_k coef_k * phi_k(t) for a linear functional regression.
254///
255/// # Errors
256///
257/// Returns [`FdarError::InvalidParameter`] if `fit.ncomp` is zero.
258/// Returns [`FdarError::InvalidDimension`] if `fit.fpca.mean` has zero length.
259#[must_use = "expensive computation whose result should not be discarded"]
260pub fn beta_decomposition(fit: &FregreLmResult) -> Result<BetaDecomposition, FdarError> {
261    let ncomp = fit.ncomp;
262    let m = fit.fpca.mean.len();
263    if ncomp == 0 {
264        return Err(FdarError::InvalidParameter {
265            parameter: "ncomp",
266            message: "must be > 0".into(),
267        });
268    }
269    if m == 0 {
270        return Err(FdarError::InvalidDimension {
271            parameter: "fpca.mean",
272            expected: ">0 length".into(),
273            actual: "0".into(),
274        });
275    }
276    Ok(decompose_beta(
277        &fit.coefficients,
278        &fit.fpca.rotation,
279        ncomp,
280        m,
281    ))
282}
283
284/// Decompose beta(t) for a functional logistic regression.
285///
286/// # Errors
287///
288/// Returns [`FdarError::InvalidParameter`] if `fit.ncomp` is zero.
289/// Returns [`FdarError::InvalidDimension`] if `fit.fpca.mean` has zero length.
290#[must_use = "expensive computation whose result should not be discarded"]
291pub fn beta_decomposition_logistic(
292    fit: &FunctionalLogisticResult,
293) -> Result<BetaDecomposition, FdarError> {
294    let ncomp = fit.ncomp;
295    let m = fit.fpca.mean.len();
296    if ncomp == 0 {
297        return Err(FdarError::InvalidParameter {
298            parameter: "ncomp",
299            message: "must be > 0".into(),
300        });
301    }
302    if m == 0 {
303        return Err(FdarError::InvalidDimension {
304            parameter: "fpca.mean",
305            expected: ">0 length".into(),
306            actual: "0".into(),
307        });
308    }
309    Ok(decompose_beta(
310        &fit.coefficients,
311        &fit.fpca.rotation,
312        ncomp,
313        m,
314    ))
315}
316
317fn decompose_beta(
318    coefficients: &[f64],
319    rotation: &FdMatrix,
320    ncomp: usize,
321    m: usize,
322) -> BetaDecomposition {
323    let mut components = Vec::with_capacity(ncomp);
324    let mut coefs = Vec::with_capacity(ncomp);
325    let mut norms_sq = Vec::with_capacity(ncomp);
326
327    for k in 0..ncomp {
328        let ck = coefficients[1 + k];
329        coefs.push(ck);
330        let comp: Vec<f64> = (0..m).map(|j| ck * rotation[(j, k)]).collect();
331        let nsq: f64 = comp.iter().map(|v| v * v).sum();
332        norms_sq.push(nsq);
333        components.push(comp);
334    }
335
336    let total_sq: f64 = norms_sq.iter().sum();
337    let variance_proportion = if total_sq > 0.0 {
338        norms_sq.iter().map(|&s| s / total_sq).collect()
339    } else {
340        vec![0.0; ncomp]
341    };
342
343    BetaDecomposition {
344        components,
345        coefficients: coefs,
346        variance_proportion,
347    }
348}
349
350// ---------------------------------------------------------------------------
351// Significant regions
352// ---------------------------------------------------------------------------
353
354/// Direction of a significant region.
355#[derive(Debug, Clone, Copy, PartialEq, Eq)]
356#[non_exhaustive]
357pub enum SignificanceDirection {
358    Positive,
359    Negative,
360}
361
362/// A contiguous interval where the confidence band excludes zero.
363#[derive(Debug, Clone, PartialEq)]
364#[non_exhaustive]
365pub struct SignificantRegion {
366    /// Start index (inclusive).
367    pub start_idx: usize,
368    /// End index (inclusive).
369    pub end_idx: usize,
370    /// Direction of the effect.
371    pub direction: SignificanceDirection,
372}
373
374/// Identify contiguous regions where the CI `[lower, upper]` excludes zero.
375///
376/// # Errors
377///
378/// Returns [`FdarError::InvalidDimension`] if `lower` is empty or
379/// `lower.len() != upper.len()`.
380#[must_use = "expensive computation whose result should not be discarded"]
381pub fn significant_regions(
382    lower: &[f64],
383    upper: &[f64],
384) -> Result<Vec<SignificantRegion>, FdarError> {
385    if lower.is_empty() {
386        return Err(FdarError::InvalidDimension {
387            parameter: "lower",
388            expected: ">0 length".into(),
389            actual: "0".into(),
390        });
391    }
392    if lower.len() != upper.len() {
393        return Err(FdarError::InvalidDimension {
394            parameter: "upper",
395            expected: format!("{} (matching lower)", lower.len()),
396            actual: format!("{}", upper.len()),
397        });
398    }
399    let n = lower.len();
400    let mut regions = Vec::new();
401    let mut i = 0;
402    while i < n {
403        if let Some(d) = detect_direction(lower[i], upper[i]) {
404            let start = i;
405            i += 1;
406            while i < n && detect_direction(lower[i], upper[i]) == Some(d) {
407                i += 1;
408            }
409            regions.push(SignificantRegion {
410                start_idx: start,
411                end_idx: i - 1,
412                direction: d,
413            });
414        } else {
415            i += 1;
416        }
417    }
418    Ok(regions)
419}
420
421/// Build CI from beta(t) +/- z * SE, then find significant regions.
422///
423/// # Errors
424///
425/// Returns [`FdarError::InvalidDimension`] if `beta_t` is empty or
426/// `beta_t.len() != beta_se.len()`.
427#[must_use = "expensive computation whose result should not be discarded"]
428pub fn significant_regions_from_se(
429    beta_t: &[f64],
430    beta_se: &[f64],
431    z_alpha: f64,
432) -> Result<Vec<SignificantRegion>, FdarError> {
433    if beta_t.is_empty() {
434        return Err(FdarError::InvalidDimension {
435            parameter: "beta_t",
436            expected: ">0 length".into(),
437            actual: "0".into(),
438        });
439    }
440    if beta_t.len() != beta_se.len() {
441        return Err(FdarError::InvalidDimension {
442            parameter: "beta_se",
443            expected: format!("{} (matching beta_t)", beta_t.len()),
444            actual: format!("{}", beta_se.len()),
445        });
446    }
447    let lower: Vec<f64> = beta_t
448        .iter()
449        .zip(beta_se)
450        .map(|(b, s)| b - z_alpha * s)
451        .collect();
452    let upper: Vec<f64> = beta_t
453        .iter()
454        .zip(beta_se)
455        .map(|(b, s)| b + z_alpha * s)
456        .collect();
457    significant_regions(&lower, &upper)
458}
459
460/// Detect significance direction at a single point from CI bounds.
461fn detect_direction(lower: f64, upper: f64) -> Option<SignificanceDirection> {
462    if lower > 0.0 {
463        Some(SignificanceDirection::Positive)
464    } else if upper < 0.0 {
465        Some(SignificanceDirection::Negative)
466    } else {
467        None
468    }
469}