Skip to main content

fdars_core/explain/
pdp.rs

1//! PDP/ICE and beta decomposition.
2
3use super::helpers::{ice_to_pdp, logistic_eta_base, make_grid, project_scores};
4use crate::error::FdarError;
5use crate::matrix::FdMatrix;
6use crate::scalar_on_function::{sigmoid, FregreLmResult, FunctionalLogisticResult};
7
8/// Result of a functional partial dependence plot.
9#[derive(Debug, Clone, PartialEq)]
10#[non_exhaustive]
11pub struct FunctionalPdpResult {
12    /// FPC score grid values (length n_grid).
13    pub grid_values: Vec<f64>,
14    /// Average prediction across observations at each grid point (length n_grid).
15    pub pdp_curve: Vec<f64>,
16    /// Individual conditional expectation curves (n x n_grid).
17    pub ice_curves: FdMatrix,
18    /// Which FPC component was varied.
19    pub component: usize,
20}
21
22/// Functional PDP/ICE for a linear functional regression model.
23///
24/// Varies the FPC score for `component` across a grid while keeping other scores
25/// fixed, producing ICE curves and their average (PDP).
26///
27/// For a linear model, ICE curves are parallel lines (same slope, different intercepts).
28///
29/// # Arguments
30/// * `fit` -- A fitted [`FregreLmResult`]
31/// * `data` -- Original functional predictor matrix (n x m)
32/// * `scalar_covariates` -- Optional scalar covariates (n x p)
33/// * `component` -- Which FPC component to vary (0-indexed, must be < fit.ncomp)
34/// * `n_grid` -- Number of grid points (must be >= 2)
35///
36/// # Errors
37///
38/// Returns [`FdarError::InvalidDimension`] if `data` has zero rows, its column
39/// count does not match `fit.fpca.mean`, or its row count does not match
40/// `fit.fitted_values`.
41/// Returns [`FdarError::InvalidParameter`] if `component >= fit.ncomp` or
42/// `n_grid < 2`.
43///
44/// # Examples
45///
46/// ```
47/// use fdars_core::matrix::FdMatrix;
48/// use fdars_core::scalar_on_function::fregre_lm;
49/// use fdars_core::explain::functional_pdp;
50///
51/// let (n, m) = (20, 30);
52/// let data = FdMatrix::from_column_major(
53///     (0..n * m).map(|k| {
54///         let i = (k % n) as f64;
55///         let j = (k / n) as f64;
56///         ((i + 1.0) * j * 0.2).sin()
57///     }).collect(),
58///     n, m,
59/// ).unwrap();
60/// let y: Vec<f64> = (0..n).map(|i| (i as f64 * 0.5).sin()).collect();
61/// let fit = fregre_lm(&data, &y, None, 3).unwrap();
62/// let pdp = functional_pdp(&fit, &data, None, 0, 10).unwrap();
63/// assert_eq!(pdp.pdp_curve.len(), 10);
64/// assert_eq!(pdp.ice_curves.shape(), (20, 10));
65/// ```
66#[must_use = "expensive computation whose result should not be discarded"]
67pub fn functional_pdp(
68    fit: &FregreLmResult,
69    data: &FdMatrix,
70    _scalar_covariates: Option<&FdMatrix>,
71    component: usize,
72    n_grid: usize,
73) -> Result<FunctionalPdpResult, FdarError> {
74    let (n, m) = data.shape();
75    if n == 0 {
76        return Err(FdarError::InvalidDimension {
77            parameter: "data",
78            expected: ">0 rows".into(),
79            actual: "0".into(),
80        });
81    }
82    if m != fit.fpca.mean.len() {
83        return Err(FdarError::InvalidDimension {
84            parameter: "data",
85            expected: format!("{} columns", fit.fpca.mean.len()),
86            actual: format!("{m}"),
87        });
88    }
89    if n != fit.fitted_values.len() {
90        return Err(FdarError::InvalidDimension {
91            parameter: "data",
92            expected: format!("{} rows matching fitted_values", fit.fitted_values.len()),
93            actual: format!("{n}"),
94        });
95    }
96    if component >= fit.ncomp {
97        return Err(FdarError::InvalidParameter {
98            parameter: "component",
99            message: format!("component {} >= ncomp {}", component, fit.ncomp),
100        });
101    }
102    if n_grid < 2 {
103        return Err(FdarError::InvalidParameter {
104            parameter: "n_grid",
105            message: "must be >= 2".into(),
106        });
107    }
108
109    let ncomp = fit.ncomp;
110    let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
111    let grid_values = make_grid(&scores, component, n_grid);
112
113    let coef_c = fit.coefficients[1 + component];
114    let mut ice_curves = FdMatrix::zeros(n, n_grid);
115    for i in 0..n {
116        let base = fit.fitted_values[i] - coef_c * scores[(i, component)];
117        for g in 0..n_grid {
118            ice_curves[(i, g)] = base + coef_c * grid_values[g];
119        }
120    }
121
122    let pdp_curve = ice_to_pdp(&ice_curves, n, n_grid);
123
124    Ok(FunctionalPdpResult {
125        grid_values,
126        pdp_curve,
127        ice_curves,
128        component,
129    })
130}
131
132/// Functional PDP/ICE for a functional logistic regression model.
133///
134/// Predictions pass through sigmoid, so ICE curves are non-parallel.
135///
136/// # Arguments
137/// * `fit` -- A fitted [`FunctionalLogisticResult`]
138/// * `data` -- Original functional predictor matrix (n x m)
139/// * `scalar_covariates` -- Optional scalar covariates (n x p)
140/// * `component` -- Which FPC component to vary (0-indexed, must be < fit.ncomp)
141/// * `n_grid` -- Number of grid points (must be >= 2)
142///
143/// # Errors
144///
145/// Returns [`FdarError::InvalidDimension`] if `data` has zero rows or its column
146/// count does not match `fit.fpca.mean`.
147/// Returns [`FdarError::InvalidParameter`] if `component >= fit.ncomp`,
148/// `n_grid < 2`, or `scalar_covariates` is `None` when the model has scalar
149/// covariates.
150#[must_use = "expensive computation whose result should not be discarded"]
151pub fn functional_pdp_logistic(
152    fit: &FunctionalLogisticResult,
153    data: &FdMatrix,
154    scalar_covariates: Option<&FdMatrix>,
155    component: usize,
156    n_grid: usize,
157) -> Result<FunctionalPdpResult, FdarError> {
158    let (n, m) = data.shape();
159    if n == 0 {
160        return Err(FdarError::InvalidDimension {
161            parameter: "data",
162            expected: ">0 rows".into(),
163            actual: "0".into(),
164        });
165    }
166    if m != fit.fpca.mean.len() {
167        return Err(FdarError::InvalidDimension {
168            parameter: "data",
169            expected: format!("{} columns", fit.fpca.mean.len()),
170            actual: format!("{m}"),
171        });
172    }
173    if component >= fit.ncomp {
174        return Err(FdarError::InvalidParameter {
175            parameter: "component",
176            message: format!("component {} >= ncomp {}", component, fit.ncomp),
177        });
178    }
179    if n_grid < 2 {
180        return Err(FdarError::InvalidParameter {
181            parameter: "n_grid",
182            message: "must be >= 2".into(),
183        });
184    }
185
186    let ncomp = fit.ncomp;
187    let p_scalar = fit.gamma.len();
188    if p_scalar > 0 && scalar_covariates.is_none() {
189        return Err(FdarError::InvalidParameter {
190            parameter: "scalar_covariates",
191            message: "required when model has scalar covariates".into(),
192        });
193    }
194
195    let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
196    let grid_values = make_grid(&scores, component, n_grid);
197
198    let mut ice_curves = FdMatrix::zeros(n, n_grid);
199    let coef_c = fit.coefficients[1 + component];
200    for i in 0..n {
201        let eta_base = logistic_eta_base(
202            fit.intercept,
203            &fit.coefficients,
204            &fit.gamma,
205            &scores,
206            scalar_covariates,
207            i,
208            ncomp,
209            component,
210        );
211        for g in 0..n_grid {
212            ice_curves[(i, g)] = sigmoid(eta_base + coef_c * grid_values[g]);
213        }
214    }
215
216    let pdp_curve = ice_to_pdp(&ice_curves, n, n_grid);
217
218    Ok(FunctionalPdpResult {
219        grid_values,
220        pdp_curve,
221        ice_curves,
222        component,
223    })
224}
225
226// ---------------------------------------------------------------------------
227// Beta decomposition
228// ---------------------------------------------------------------------------
229
230/// Per-FPC decomposition of the functional coefficient beta(t).
231#[derive(Debug, Clone, PartialEq)]
232pub struct BetaDecomposition {
233    /// `components[k]` = coef_k * phi_k(t), each of length m.
234    pub components: Vec<Vec<f64>>,
235    /// FPC regression coefficients (length ncomp).
236    pub coefficients: Vec<f64>,
237    /// Proportion of ||beta(t)||^2 explained by each component.
238    pub variance_proportion: Vec<f64>,
239}
240
241/// Decompose beta(t) = sum_k coef_k * phi_k(t) for a linear functional regression.
242///
243/// # Errors
244///
245/// Returns [`FdarError::InvalidParameter`] if `fit.ncomp` is zero.
246/// Returns [`FdarError::InvalidDimension`] if `fit.fpca.mean` has zero length.
247#[must_use = "expensive computation whose result should not be discarded"]
248pub fn beta_decomposition(fit: &FregreLmResult) -> Result<BetaDecomposition, FdarError> {
249    let ncomp = fit.ncomp;
250    let m = fit.fpca.mean.len();
251    if ncomp == 0 {
252        return Err(FdarError::InvalidParameter {
253            parameter: "ncomp",
254            message: "must be > 0".into(),
255        });
256    }
257    if m == 0 {
258        return Err(FdarError::InvalidDimension {
259            parameter: "fpca.mean",
260            expected: ">0 length".into(),
261            actual: "0".into(),
262        });
263    }
264    Ok(decompose_beta(
265        &fit.coefficients,
266        &fit.fpca.rotation,
267        ncomp,
268        m,
269    ))
270}
271
272/// Decompose beta(t) for a functional logistic regression.
273///
274/// # Errors
275///
276/// Returns [`FdarError::InvalidParameter`] if `fit.ncomp` is zero.
277/// Returns [`FdarError::InvalidDimension`] if `fit.fpca.mean` has zero length.
278#[must_use = "expensive computation whose result should not be discarded"]
279pub fn beta_decomposition_logistic(
280    fit: &FunctionalLogisticResult,
281) -> Result<BetaDecomposition, FdarError> {
282    let ncomp = fit.ncomp;
283    let m = fit.fpca.mean.len();
284    if ncomp == 0 {
285        return Err(FdarError::InvalidParameter {
286            parameter: "ncomp",
287            message: "must be > 0".into(),
288        });
289    }
290    if m == 0 {
291        return Err(FdarError::InvalidDimension {
292            parameter: "fpca.mean",
293            expected: ">0 length".into(),
294            actual: "0".into(),
295        });
296    }
297    Ok(decompose_beta(
298        &fit.coefficients,
299        &fit.fpca.rotation,
300        ncomp,
301        m,
302    ))
303}
304
305fn decompose_beta(
306    coefficients: &[f64],
307    rotation: &FdMatrix,
308    ncomp: usize,
309    m: usize,
310) -> BetaDecomposition {
311    let mut components = Vec::with_capacity(ncomp);
312    let mut coefs = Vec::with_capacity(ncomp);
313    let mut norms_sq = Vec::with_capacity(ncomp);
314
315    for k in 0..ncomp {
316        let ck = coefficients[1 + k];
317        coefs.push(ck);
318        let comp: Vec<f64> = (0..m).map(|j| ck * rotation[(j, k)]).collect();
319        let nsq: f64 = comp.iter().map(|v| v * v).sum();
320        norms_sq.push(nsq);
321        components.push(comp);
322    }
323
324    let total_sq: f64 = norms_sq.iter().sum();
325    let variance_proportion = if total_sq > 0.0 {
326        norms_sq.iter().map(|&s| s / total_sq).collect()
327    } else {
328        vec![0.0; ncomp]
329    };
330
331    BetaDecomposition {
332        components,
333        coefficients: coefs,
334        variance_proportion,
335    }
336}
337
338// ---------------------------------------------------------------------------
339// Significant regions
340// ---------------------------------------------------------------------------
341
342/// Direction of a significant region.
343#[derive(Debug, Clone, Copy, PartialEq, Eq)]
344#[non_exhaustive]
345pub enum SignificanceDirection {
346    Positive,
347    Negative,
348}
349
350/// A contiguous interval where the confidence band excludes zero.
351#[derive(Debug, Clone, PartialEq)]
352#[non_exhaustive]
353pub struct SignificantRegion {
354    /// Start index (inclusive).
355    pub start_idx: usize,
356    /// End index (inclusive).
357    pub end_idx: usize,
358    /// Direction of the effect.
359    pub direction: SignificanceDirection,
360}
361
362/// Identify contiguous regions where the CI `[lower, upper]` excludes zero.
363///
364/// # Errors
365///
366/// Returns [`FdarError::InvalidDimension`] if `lower` is empty or
367/// `lower.len() != upper.len()`.
368#[must_use = "expensive computation whose result should not be discarded"]
369pub fn significant_regions(
370    lower: &[f64],
371    upper: &[f64],
372) -> Result<Vec<SignificantRegion>, FdarError> {
373    if lower.is_empty() {
374        return Err(FdarError::InvalidDimension {
375            parameter: "lower",
376            expected: ">0 length".into(),
377            actual: "0".into(),
378        });
379    }
380    if lower.len() != upper.len() {
381        return Err(FdarError::InvalidDimension {
382            parameter: "upper",
383            expected: format!("{} (matching lower)", lower.len()),
384            actual: format!("{}", upper.len()),
385        });
386    }
387    let n = lower.len();
388    let mut regions = Vec::new();
389    let mut i = 0;
390    while i < n {
391        if let Some(d) = detect_direction(lower[i], upper[i]) {
392            let start = i;
393            i += 1;
394            while i < n && detect_direction(lower[i], upper[i]) == Some(d) {
395                i += 1;
396            }
397            regions.push(SignificantRegion {
398                start_idx: start,
399                end_idx: i - 1,
400                direction: d,
401            });
402        } else {
403            i += 1;
404        }
405    }
406    Ok(regions)
407}
408
409/// Build CI from beta(t) +/- z * SE, then find significant regions.
410///
411/// # Errors
412///
413/// Returns [`FdarError::InvalidDimension`] if `beta_t` is empty or
414/// `beta_t.len() != beta_se.len()`.
415#[must_use = "expensive computation whose result should not be discarded"]
416pub fn significant_regions_from_se(
417    beta_t: &[f64],
418    beta_se: &[f64],
419    z_alpha: f64,
420) -> Result<Vec<SignificantRegion>, FdarError> {
421    if beta_t.is_empty() {
422        return Err(FdarError::InvalidDimension {
423            parameter: "beta_t",
424            expected: ">0 length".into(),
425            actual: "0".into(),
426        });
427    }
428    if beta_t.len() != beta_se.len() {
429        return Err(FdarError::InvalidDimension {
430            parameter: "beta_se",
431            expected: format!("{} (matching beta_t)", beta_t.len()),
432            actual: format!("{}", beta_se.len()),
433        });
434    }
435    let lower: Vec<f64> = beta_t
436        .iter()
437        .zip(beta_se)
438        .map(|(b, s)| b - z_alpha * s)
439        .collect();
440    let upper: Vec<f64> = beta_t
441        .iter()
442        .zip(beta_se)
443        .map(|(b, s)| b + z_alpha * s)
444        .collect();
445    significant_regions(&lower, &upper)
446}
447
448/// Detect significance direction at a single point from CI bounds.
449fn detect_direction(lower: f64, upper: f64) -> Option<SignificanceDirection> {
450    if lower > 0.0 {
451        Some(SignificanceDirection::Positive)
452    } else if upper < 0.0 {
453        Some(SignificanceDirection::Negative)
454    } else {
455        None
456    }
457}