use super::helpers::{
compute_column_means, compute_domain_selection, compute_mean_scalar, compute_saliency_map,
compute_score_variance, compute_sobol_component, generate_sobol_matrices, mean_absolute_column,
project_scores,
};
use crate::error::FdarError;
use crate::matrix::FdMatrix;
use crate::scalar_on_function::{sigmoid, FregreLmResult, FunctionalLogisticResult};
use rand::prelude::*;
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct SobolIndicesResult {
pub first_order: Vec<f64>,
pub total_order: Vec<f64>,
pub var_y: f64,
pub component_variance: Vec<f64>,
}
#[must_use = "expensive computation whose result should not be discarded"]
pub fn sobol_indices(
fit: &FregreLmResult,
data: &FdMatrix,
y: &[f64],
scalar_covariates: Option<&FdMatrix>,
) -> Result<SobolIndicesResult, FdarError> {
let (n, m) = data.shape();
if n < 2 {
return Err(FdarError::InvalidDimension {
parameter: "data",
expected: ">=2 rows".into(),
actual: format!("{n}"),
});
}
if n != y.len() {
return Err(FdarError::InvalidDimension {
parameter: "y",
expected: format!("{n} (matching data rows)"),
actual: format!("{}", y.len()),
});
}
if m != fit.fpca.mean.len() {
return Err(FdarError::InvalidDimension {
parameter: "data",
expected: format!("{} columns", fit.fpca.mean.len()),
actual: format!("{m}"),
});
}
let _ = scalar_covariates; let ncomp = fit.ncomp;
if ncomp == 0 {
return Err(FdarError::InvalidParameter {
parameter: "ncomp",
message: "must be > 0".into(),
});
}
let score_var = compute_score_variance(&fit.fpca.scores, n, ncomp);
let component_variance: Vec<f64> = (0..ncomp)
.map(|k| fit.coefficients[1 + k].powi(2) * score_var[k])
.collect();
let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
let var_y: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum::<f64>() / (n - 1) as f64;
if var_y == 0.0 {
return Err(FdarError::ComputationFailed {
operation: "sobol_indices",
detail: "variance of y is zero; all response values may be identical — check your data"
.into(),
});
}
let first_order: Vec<f64> = component_variance.iter().map(|&cv| cv / var_y).collect();
let total_order = first_order.clone();
Ok(SobolIndicesResult {
first_order,
total_order,
var_y,
component_variance,
})
}
#[must_use = "expensive computation whose result should not be discarded"]
pub fn sobol_indices_logistic(
fit: &FunctionalLogisticResult,
data: &FdMatrix,
scalar_covariates: Option<&FdMatrix>,
n_samples: usize,
seed: u64,
) -> Result<SobolIndicesResult, FdarError> {
let (n, m) = data.shape();
if n < 2 {
return Err(FdarError::InvalidDimension {
parameter: "data",
expected: ">=2 rows".into(),
actual: format!("{n}"),
});
}
if m != fit.fpca.mean.len() {
return Err(FdarError::InvalidDimension {
parameter: "data",
expected: format!("{} columns", fit.fpca.mean.len()),
actual: format!("{m}"),
});
}
if n_samples == 0 {
return Err(FdarError::InvalidParameter {
parameter: "n_samples",
message: "must be > 0".into(),
});
}
let ncomp = fit.ncomp;
if ncomp == 0 {
return Err(FdarError::InvalidParameter {
parameter: "ncomp",
message: "must be > 0".into(),
});
}
let p_scalar = fit.gamma.len();
let scores = project_scores(
data,
&fit.fpca.mean,
&fit.fpca.rotation,
ncomp,
&fit.fpca.weights,
);
let mean_z = compute_mean_scalar(scalar_covariates, p_scalar, n);
let eval_model = |s: &[f64]| -> f64 {
let mut eta = fit.intercept;
for k in 0..ncomp {
eta += fit.coefficients[1 + k] * s[k];
}
for j in 0..p_scalar {
eta += fit.gamma[j] * mean_z[j];
}
sigmoid(eta)
};
let mut rng = StdRng::seed_from_u64(seed);
let (mat_a, mat_b) = generate_sobol_matrices(&scores, n, ncomp, n_samples, &mut rng);
let f_a: Vec<f64> = mat_a.iter().map(|s| eval_model(s)).collect();
let f_b: Vec<f64> = mat_b.iter().map(|s| eval_model(s)).collect();
let mean_fa = f_a.iter().sum::<f64>() / n_samples as f64;
let var_fa = f_a.iter().map(|&v| (v - mean_fa).powi(2)).sum::<f64>() / n_samples as f64;
if var_fa < 1e-15 {
return Err(FdarError::ComputationFailed {
operation: "sobol_indices_logistic",
detail: "variance of predictions is near zero; the model may be constant — check that FPC scores vary across observations".into(),
});
}
let mut first_order = vec![0.0; ncomp];
let mut total_order = vec![0.0; ncomp];
let mut component_variance = vec![0.0; ncomp];
for k in 0..ncomp {
let (s_k, st_k) = compute_sobol_component(
&mat_a,
&mat_b,
&f_a,
&f_b,
var_fa,
k,
n_samples,
&eval_model,
);
first_order[k] = s_k;
total_order[k] = st_k;
component_variance[k] = s_k * var_fa;
}
Ok(SobolIndicesResult {
first_order,
total_order,
var_y: var_fa,
component_variance,
})
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct FunctionalSaliencyResult {
pub saliency_map: FdMatrix,
pub mean_absolute_saliency: Vec<f64>,
}
#[must_use = "expensive computation whose result should not be discarded"]
pub fn functional_saliency(
fit: &FregreLmResult,
data: &FdMatrix,
scalar_covariates: Option<&FdMatrix>,
) -> Result<FunctionalSaliencyResult, FdarError> {
let (n, m) = data.shape();
if n == 0 {
return Err(FdarError::InvalidDimension {
parameter: "data",
expected: ">0 rows".into(),
actual: "0".into(),
});
}
if m != fit.fpca.mean.len() {
return Err(FdarError::InvalidDimension {
parameter: "data",
expected: format!("{} columns", fit.fpca.mean.len()),
actual: format!("{m}"),
});
}
let _ = scalar_covariates;
let ncomp = fit.ncomp;
if ncomp == 0 {
return Err(FdarError::InvalidParameter {
parameter: "ncomp",
message: "must be > 0".into(),
});
}
let scores = project_scores(
data,
&fit.fpca.mean,
&fit.fpca.rotation,
ncomp,
&fit.fpca.weights,
);
let mean_scores = compute_column_means(&scores, ncomp);
let weights: Vec<f64> = (0..ncomp).map(|k| fit.coefficients[1 + k]).collect();
let saliency_map = compute_saliency_map(
&scores,
&mean_scores,
&weights,
&fit.fpca.rotation,
n,
m,
ncomp,
);
let mean_absolute_saliency = mean_absolute_column(&saliency_map, n, m);
Ok(FunctionalSaliencyResult {
saliency_map,
mean_absolute_saliency,
})
}
#[must_use = "expensive computation whose result should not be discarded"]
pub fn functional_saliency_logistic(
fit: &FunctionalLogisticResult,
) -> Result<FunctionalSaliencyResult, FdarError> {
let m = fit.beta_t.len();
let n = fit.probabilities.len();
if n == 0 {
return Err(FdarError::InvalidDimension {
parameter: "probabilities",
expected: ">0 length".into(),
actual: "0".into(),
});
}
if m == 0 {
return Err(FdarError::InvalidDimension {
parameter: "beta_t",
expected: ">0 length".into(),
actual: "0".into(),
});
}
let mut saliency_map = FdMatrix::zeros(n, m);
for i in 0..n {
let pi = fit.probabilities[i];
let w = pi * (1.0 - pi);
for j in 0..m {
saliency_map[(i, j)] = w * fit.beta_t[j];
}
}
let mut mean_absolute_saliency = vec![0.0; m];
for j in 0..m {
for i in 0..n {
mean_absolute_saliency[j] += saliency_map[(i, j)].abs();
}
mean_absolute_saliency[j] /= n as f64;
}
Ok(FunctionalSaliencyResult {
saliency_map,
mean_absolute_saliency,
})
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct ImportantInterval {
pub start_idx: usize,
pub end_idx: usize,
pub importance: f64,
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct DomainSelectionResult {
pub pointwise_importance: Vec<f64>,
pub intervals: Vec<ImportantInterval>,
pub window_width: usize,
pub threshold: f64,
}
#[must_use = "expensive computation whose result should not be discarded"]
pub fn domain_selection(
fit: &FregreLmResult,
window_width: usize,
threshold: f64,
) -> Result<DomainSelectionResult, FdarError> {
compute_domain_selection(&fit.beta_t, window_width, threshold).ok_or_else(|| {
FdarError::InvalidParameter {
parameter: "domain_selection",
message: "invalid beta_t, window_width, or threshold".into(),
}
})
}
#[must_use = "expensive computation whose result should not be discarded"]
pub fn domain_selection_logistic(
fit: &FunctionalLogisticResult,
window_width: usize,
threshold: f64,
) -> Result<DomainSelectionResult, FdarError> {
compute_domain_selection(&fit.beta_t, window_width, threshold).ok_or_else(|| {
FdarError::InvalidParameter {
parameter: "domain_selection_logistic",
message: "invalid beta_t, window_width, or threshold".into(),
}
})
}