use crate::matrix::FdMatrix;
use std::collections::HashMap;
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct FdaData {
pub curves: Option<FdMatrix>,
pub argvals: Option<Vec<f64>>,
pub grouping: Vec<GroupVar>,
pub scalar_vars: Vec<NamedVec>,
pub tabular: Option<FdMatrix>,
pub column_names: Option<Vec<String>>,
pub layers: HashMap<LayerKey, Layer>,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct NamedVec {
pub name: String,
pub values: Vec<f64>,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct GroupVar {
pub name: String,
pub labels: Vec<String>,
pub unique: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub enum LayerKey {
Fpca,
Pls,
Alignment,
Distances,
Depth,
Outliers,
Clusters,
Regression,
FunctionOnScalar,
Tolerance,
Mean,
SpmChart,
SpmMonitor,
Explain,
Custom(String),
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub enum Layer {
Fpca(FpcaLayer),
Pls(PlsLayer),
Alignment(AlignmentLayer),
Distances(DistancesLayer),
Depth(DepthLayer),
Outliers(OutlierLayer),
Clusters(ClusterLayer),
Regression(RegressionLayer),
FunctionOnScalar(FosrLayer),
Tolerance(ToleranceLayer),
Mean(MeanLayer),
SpmChart(SpmChartLayer),
SpmMonitor(SpmMonitorLayer),
Explain(ExplainLayer),
Custom(CustomLayer),
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct FpcaLayer {
pub eigenvalues: Vec<f64>,
pub variance_explained: Vec<f64>,
pub eigenfunctions: FdMatrix,
pub scores: FdMatrix,
pub mean: Vec<f64>,
pub weights: Vec<f64>,
pub ncomp: usize,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct PlsLayer {
pub weights: FdMatrix,
pub scores: FdMatrix,
pub loadings: FdMatrix,
pub ncomp: usize,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct AlignmentLayer {
pub aligned: FdMatrix,
pub warps: FdMatrix,
pub mean: Vec<f64>,
pub mean_srsf: Vec<f64>,
pub n_iter: Option<usize>,
pub converged: Option<bool>,
}
impl AlignmentLayer {
pub fn to_karcher_mean_result(&self) -> crate::alignment::KarcherMeanResult {
crate::alignment::KarcherMeanResult {
mean: self.mean.clone(),
mean_srsf: self.mean_srsf.clone(),
gammas: self.warps.clone(),
aligned_data: self.aligned.clone(),
n_iter: self.n_iter.unwrap_or(0),
converged: self.converged.unwrap_or(true),
aligned_srsfs: None,
}
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct DistancesLayer {
pub dist_mat: FdMatrix,
pub method: String,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct DepthLayer {
pub scores: Vec<f64>,
pub method: String,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct OutlierLayer {
pub flags: Vec<bool>,
pub threshold: f64,
pub method: String,
pub mei: Option<Vec<f64>>,
pub mbd: Option<Vec<f64>>,
pub magnitude: Option<Vec<f64>>,
pub shape: Option<Vec<f64>>,
pub outliergram_a0: Option<f64>,
pub outliergram_a1: Option<f64>,
pub outliergram_a2: Option<f64>,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ClusterLayer {
pub labels: Vec<usize>,
pub k: usize,
pub method: String,
pub centers: Option<FdMatrix>,
pub medoid_indices: Option<Vec<usize>>,
pub silhouette: Option<Vec<f64>>,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct RegressionLayer {
pub method: String,
pub beta_t: Option<Vec<f64>>,
pub fitted_values: Vec<f64>,
pub residuals: Vec<f64>,
pub observed_y: Vec<f64>,
pub r_squared: f64,
pub adj_r_squared: Option<f64>,
pub intercept: f64,
pub ncomp: usize,
pub argvals: Option<Vec<f64>>,
pub beta_se: Option<Vec<f64>>,
pub model_name: Option<String>,
pub n_obs: Option<usize>,
pub fpca: Option<Box<FpcaLayer>>,
#[cfg(feature = "serde")]
pub selection_extra: Option<serde_json::Value>,
#[cfg(not(feature = "serde"))]
pub selection_extra: Option<HashMap<String, Vec<f64>>>,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct FosrLayer {
pub coefficients: FdMatrix,
pub fitted: FdMatrix,
pub r_squared_t: Vec<f64>,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ToleranceLayer {
pub lower: Vec<f64>,
pub upper: Vec<f64>,
pub center: Vec<f64>,
pub method: String,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct MeanLayer {
pub mean: Vec<f64>,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct SpmChartLayer {
pub t2_limit: f64,
pub spe_limit: f64,
pub t2_stats: Vec<f64>,
pub spe_stats: Vec<f64>,
pub ncomp: usize,
pub alpha: f64,
pub eigenvalues: Option<Vec<f64>>,
pub fpca_mean: Option<Vec<f64>>,
pub fpca_rotation: Option<FdMatrix>,
pub fpca_weights: Option<Vec<f64>>,
}
impl SpmChartLayer {
pub fn from_chart(chart: &crate::spm::SpmChart) -> Self {
Self {
t2_limit: chart.t2_limit.ucl,
spe_limit: chart.spe_limit.ucl,
t2_stats: chart.t2_phase1.clone(),
spe_stats: chart.spe_phase1.clone(),
ncomp: chart.eigenvalues.len(),
alpha: chart.config.alpha,
eigenvalues: Some(chart.eigenvalues.clone()),
fpca_mean: Some(chart.fpca.mean.clone()),
fpca_rotation: Some(chart.fpca.rotation.clone()),
fpca_weights: Some(chart.fpca.weights.clone()),
}
}
pub fn can_monitor(&self) -> bool {
self.eigenvalues.is_some()
&& self.fpca_mean.is_some()
&& self.fpca_rotation.is_some()
&& self.fpca_weights.is_some()
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct SpmMonitorLayer {
pub t2_stats: Vec<f64>,
pub spe_stats: Vec<f64>,
pub t2_limit: f64,
pub spe_limit: f64,
pub t2_alarms: Vec<bool>,
pub spe_alarms: Vec<bool>,
}
#[cfg(feature = "serde")]
pub type ExplainExtra = serde_json::Value;
#[cfg(not(feature = "serde"))]
pub type ExplainExtra = HashMap<String, Vec<f64>>;
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ExplainLayer {
pub method: String,
pub values: Vec<f64>,
pub labels: Vec<String>,
pub extra: Option<ExplainExtra>,
}
#[cfg(feature = "serde")]
pub type CustomData = serde_json::Value;
#[cfg(not(feature = "serde"))]
pub type CustomData = HashMap<String, Vec<f64>>;
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct CustomLayer {
pub name: String,
pub data: CustomData,
}
impl FdaData {
pub fn from_curves(curves: FdMatrix, argvals: Vec<f64>) -> Self {
Self {
curves: Some(curves),
argvals: Some(argvals),
grouping: Vec::new(),
scalar_vars: Vec::new(),
tabular: None,
column_names: None,
layers: HashMap::new(),
}
}
pub fn from_tabular(tabular: FdMatrix, column_names: Vec<String>) -> Self {
Self {
curves: None,
argvals: None,
grouping: Vec::new(),
scalar_vars: Vec::new(),
tabular: Some(tabular),
column_names: Some(column_names),
layers: HashMap::new(),
}
}
pub fn empty() -> Self {
Self {
curves: None,
argvals: None,
grouping: Vec::new(),
scalar_vars: Vec::new(),
tabular: None,
column_names: None,
layers: HashMap::new(),
}
}
pub fn require_curves(&self) -> Result<(&FdMatrix, &[f64]), String> {
match (&self.curves, &self.argvals) {
(Some(c), Some(a)) => Ok((c, a)),
_ => Err("FdaData requires functional curves + argvals".into()),
}
}
pub fn require_layer(&self, key: &LayerKey) -> Result<&Layer, String> {
self.layers
.get(key)
.ok_or_else(|| format!("FdaData missing required layer: {key:?}"))
}
pub fn has_layer(&self, key: &LayerKey) -> bool {
self.layers.contains_key(key)
}
pub fn get_layer(&self, key: &LayerKey) -> Option<&Layer> {
self.layers.get(key)
}
pub fn set_layer(&mut self, key: LayerKey, layer: Layer) {
self.layers.insert(key, layer);
}
pub fn remove_layer(&mut self, key: &LayerKey) -> Option<Layer> {
self.layers.remove(key)
}
pub fn layer_keys(&self) -> Vec<&LayerKey> {
self.layers.keys().collect()
}
pub fn fpca(&self) -> Option<&FpcaLayer> {
match self.layers.get(&LayerKey::Fpca)? {
Layer::Fpca(l) => Some(l),
_ => None,
}
}
pub fn distances(&self) -> Option<&DistancesLayer> {
match self.layers.get(&LayerKey::Distances)? {
Layer::Distances(l) => Some(l),
_ => None,
}
}
pub fn alignment(&self) -> Option<&AlignmentLayer> {
match self.layers.get(&LayerKey::Alignment)? {
Layer::Alignment(l) => Some(l),
_ => None,
}
}
pub fn regression(&self) -> Option<&RegressionLayer> {
match self.layers.get(&LayerKey::Regression)? {
Layer::Regression(l) => Some(l),
_ => None,
}
}
pub fn clusters(&self) -> Option<&ClusterLayer> {
match self.layers.get(&LayerKey::Clusters)? {
Layer::Clusters(l) => Some(l),
_ => None,
}
}
pub fn depth(&self) -> Option<&DepthLayer> {
match self.layers.get(&LayerKey::Depth)? {
Layer::Depth(l) => Some(l),
_ => None,
}
}
pub fn outliers(&self) -> Option<&OutlierLayer> {
match self.layers.get(&LayerKey::Outliers)? {
Layer::Outliers(l) => Some(l),
_ => None,
}
}
pub fn n_obs(&self) -> usize {
if let Some(c) = &self.curves {
return c.nrows();
}
if let Some(t) = &self.tabular {
return t.nrows();
}
self.scalar_vars.first().map_or(0, |v| v.values.len())
}
pub fn n_points(&self) -> usize {
self.argvals.as_ref().map_or(0, |a| a.len())
}
pub fn add_scalar(&mut self, name: impl Into<String>, values: Vec<f64>) {
self.scalar_vars.push(NamedVec {
name: name.into(),
values,
});
}
pub fn get_scalar(&self, name: &str) -> Option<&[f64]> {
self.scalar_vars
.iter()
.find(|v| v.name == name)
.map(|v| v.values.as_slice())
}
pub fn add_grouping(&mut self, name: impl Into<String>, labels: Vec<String>) {
let mut unique = Vec::new();
for lab in &labels {
if !unique.contains(lab) {
unique.push(lab.clone());
}
}
self.grouping.push(GroupVar {
name: name.into(),
labels,
unique,
});
}
pub fn get_grouping(&self, name: &str) -> Option<&GroupVar> {
self.grouping.iter().find(|g| g.name == name)
}
}
impl From<&crate::scalar_on_function::FregreLmResult> for RegressionLayer {
fn from(fit: &crate::scalar_on_function::FregreLmResult) -> Self {
let n_tune = fit.fpca.scores.nrows();
let eigenvalues: Vec<f64> = fit
.fpca
.singular_values
.iter()
.map(|s| s * s / (n_tune as f64 - 1.0).max(1.0))
.collect();
let total_var: f64 = eigenvalues.iter().sum();
let variance_explained = if total_var > 0.0 {
eigenvalues.iter().map(|&ev| ev / total_var).collect()
} else {
vec![0.0; eigenvalues.len()]
};
let fpca_layer = FpcaLayer {
eigenvalues,
variance_explained,
eigenfunctions: fit.fpca.rotation.clone(),
scores: fit.fpca.scores.clone(),
mean: fit.fpca.mean.clone(),
weights: fit.fpca.weights.clone(),
ncomp: fit.ncomp,
};
RegressionLayer {
method: "fregre_lm".into(),
beta_t: Some(fit.beta_t.clone()),
fitted_values: fit.fitted_values.clone(),
residuals: fit.residuals.clone(),
observed_y: Vec::new(),
r_squared: fit.r_squared,
adj_r_squared: Some(fit.r_squared_adj),
intercept: fit.intercept,
ncomp: fit.ncomp,
argvals: None,
beta_se: Some(fit.beta_se.clone()),
model_name: None,
n_obs: Some(fit.fitted_values.len()),
fpca: Some(Box::new(fpca_layer)),
selection_extra: None,
}
}
}
impl From<&crate::scalar_on_function::PlsRegressionResult> for RegressionLayer {
fn from(fit: &crate::scalar_on_function::PlsRegressionResult) -> Self {
RegressionLayer {
method: "fregre_pls".into(),
beta_t: Some(fit.beta_t.clone()),
fitted_values: fit.fitted_values.clone(),
residuals: fit.residuals.clone(),
observed_y: Vec::new(),
r_squared: fit.r_squared,
adj_r_squared: Some(fit.r_squared_adj),
intercept: fit.intercept,
ncomp: fit.ncomp,
argvals: None,
beta_se: None,
model_name: None,
n_obs: Some(fit.fitted_values.len()),
fpca: None, selection_extra: None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn from_curves_basic() {
let fd = FdaData::from_curves(
FdMatrix::zeros(10, 50),
(0..50).map(|i| i as f64 / 49.0).collect(),
);
assert_eq!(fd.n_obs(), 10);
assert_eq!(fd.n_points(), 50);
assert!(fd.require_curves().is_ok());
assert!(!fd.has_layer(&LayerKey::Fpca));
}
#[test]
fn add_and_retrieve_layers() {
let mut fd = FdaData::from_curves(
FdMatrix::zeros(5, 20),
(0..20).map(|i| i as f64 / 19.0).collect(),
);
fd.set_layer(
LayerKey::Depth,
Layer::Depth(DepthLayer {
scores: vec![0.5; 5],
method: "fraiman_muniz".into(),
}),
);
assert!(fd.has_layer(&LayerKey::Depth));
assert!(!fd.has_layer(&LayerKey::Fpca));
assert!(fd.depth().is_some());
assert_eq!(fd.depth().unwrap().scores.len(), 5);
assert_eq!(fd.layer_keys().len(), 1);
}
#[test]
fn require_missing_layer_errors() {
let fd = FdaData::from_curves(FdMatrix::zeros(3, 10), vec![0.0; 10]);
assert!(fd.require_layer(&LayerKey::Fpca).is_err());
}
#[test]
fn scalar_vars() {
let mut fd = FdaData::empty();
fd.add_scalar("height", vec![170.0, 180.0, 165.0]);
assert_eq!(fd.get_scalar("height").unwrap(), &[170.0, 180.0, 165.0]);
assert!(fd.get_scalar("weight").is_none());
assert_eq!(fd.n_obs(), 3);
}
#[test]
fn multiple_layers_compose() {
let mut fd = FdaData::from_curves(FdMatrix::zeros(10, 30), vec![0.0; 30]);
fd.set_layer(
LayerKey::Depth,
Layer::Depth(DepthLayer {
scores: vec![0.5; 10],
method: "fm".into(),
}),
);
fd.set_layer(
LayerKey::Outliers,
Layer::Outliers(OutlierLayer {
flags: vec![false; 10],
threshold: 0.1,
method: "lrt".into(),
mei: None,
mbd: None,
magnitude: None,
shape: None,
outliergram_a0: None,
outliergram_a1: None,
outliergram_a2: None,
}),
);
fd.set_layer(
LayerKey::Distances,
Layer::Distances(DistancesLayer {
dist_mat: FdMatrix::zeros(10, 10),
method: "elastic".into(),
}),
);
assert_eq!(fd.layer_keys().len(), 3);
assert!(fd.depth().is_some());
assert!(fd.outliers().is_some());
assert!(fd.distances().is_some());
}
#[test]
fn regression_layer_from_fregre_lm() {
let (n, m) = (20, 30);
let data = FdMatrix::from_column_major(
(0..n * m)
.map(|k| {
let i = (k % n) as f64;
let j = (k / n) as f64;
((i + 1.0) * j * 0.2).sin()
})
.collect(),
n,
m,
)
.unwrap();
let y: Vec<f64> = (0..n).map(|i| (i as f64 * 0.5).sin()).collect();
let fit = crate::scalar_on_function::fregre_lm(&data, &y, None, 3).unwrap();
let layer = RegressionLayer::from(&fit);
assert_eq!(layer.method, "fregre_lm");
assert_eq!(layer.ncomp, 3);
assert_eq!(layer.fitted_values.len(), n);
assert_eq!(layer.residuals.len(), n);
assert!(layer.fpca.is_some());
let fpca = layer.fpca.as_ref().unwrap();
assert_eq!(fpca.ncomp, 3);
assert_eq!(fpca.mean.len(), m);
assert_eq!(fpca.eigenfunctions.shape(), (m, 3));
assert_eq!(fpca.scores.shape(), (n, 3));
assert_eq!(fpca.weights.len(), m);
assert_eq!(fpca.eigenvalues.len(), 3);
let ve_sum: f64 = fpca.variance_explained.iter().sum();
assert!(
(ve_sum - 1.0).abs() < 1e-10,
"variance_explained sum = {ve_sum}"
);
assert!(layer.beta_t.is_some());
assert!(layer.beta_se.is_some());
assert_eq!(layer.n_obs, Some(n));
assert!((layer.r_squared - fit.r_squared).abs() < 1e-14);
}
#[test]
fn regression_layer_from_pls() {
let n = 30;
let m = 50;
let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
let vals: Vec<f64> = (0..n)
.flat_map(|i| {
t.iter()
.map(move |&tj| (2.0 * std::f64::consts::PI * tj).sin() + 0.1 * i as f64)
})
.collect();
let data = FdMatrix::from_column_major(vals, n, m).unwrap();
let y: Vec<f64> = (0..n).map(|i| 2.0 + 0.5 * i as f64).collect();
let fit = crate::scalar_on_function::fregre_pls(&data, &y, &t, 3, None).unwrap();
let layer = RegressionLayer::from(&fit);
assert_eq!(layer.method, "fregre_pls");
assert_eq!(layer.ncomp, 3);
assert_eq!(layer.fitted_values.len(), n);
assert!(layer.fpca.is_none()); assert!(layer.beta_t.is_some());
assert!(layer.beta_se.is_none()); assert_eq!(layer.n_obs, Some(n));
assert!((layer.r_squared - fit.r_squared).abs() < 1e-14);
}
}