Skip to main content

fdars_core/
wire.rs

1//! Unified FDA data container for pipeline interchange.
2//!
3//! [`FdaData`] is a single layered container that flows between pipeline nodes.
4//! Nodes read from existing layers and add new ones — data is additive, never
5//! destructive. This replaces per-type wire enums with a composable structure.
6//!
7//! # Design
8//!
9//! - **Core**: curves (FdMatrix) + argvals + metadata (grouping, scalars)
10//! - **Layers**: optional analysis results keyed by [`LayerKey`]
11//! - Nodes declare what they *require* via `require_*` helpers
12//! - Nodes add results via `set_layer`
13//! - Layers compose: FPCA + Depth + Outliers can all coexist on one `FdaData`
14//!
15//! # Example
16//!
17//! ```
18//! use fdars_core::wire::*;
19//! use fdars_core::matrix::FdMatrix;
20//!
21//! let mut fd = FdaData::from_curves(
22//!     FdMatrix::zeros(10, 50),
23//!     (0..50).map(|i| i as f64 / 49.0).collect(),
24//! );
25//!
26//! // A depth node reads curves, adds a Depth layer
27//! let scores = vec![0.5; 10];
28//! fd.set_layer(LayerKey::Depth, Layer::Depth(DepthLayer {
29//!     scores,
30//!     method: "fraiman_muniz".into(),
31//! }));
32//!
33//! // Downstream node checks what's available
34//! assert!(fd.has_layer(&LayerKey::Depth));
35//! assert!(!fd.has_layer(&LayerKey::Fpca));
36//! ```
37
38use crate::matrix::FdMatrix;
39use std::collections::HashMap;
40
41// ─── Core Container ─────────────────────────────────────────────────────────
42
43/// Unified FDA data object for pipeline interchange.
44///
45/// Carries functional data (curves + domain) plus optional analysis layers.
46/// Nodes read what they need and add their results as new layers.
47#[derive(Debug, Clone)]
48#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49pub struct FdaData {
50    // ── Core functional data ──
51    /// Functional observations (n × m). `None` for tabular-only data.
52    pub curves: Option<FdMatrix>,
53    /// Evaluation grid (length m).
54    pub argvals: Option<Vec<f64>>,
55
56    // ── Metadata ──
57    /// Group labels per observation (length n).
58    pub grouping: Option<Vec<usize>>,
59    /// Group names (index → label).
60    pub group_names: Option<Vec<String>>,
61    /// Named scalar variables (each length n).
62    pub scalar_vars: Vec<NamedVec>,
63    /// Tabular data for non-functional variables (n × p).
64    pub tabular: Option<FdMatrix>,
65    /// Column names for tabular data.
66    pub column_names: Option<Vec<String>>,
67
68    // ── Analysis layers ──
69    /// Analysis results keyed by layer type.
70    pub layers: HashMap<LayerKey, Layer>,
71}
72
73/// A named vector of f64 values (e.g., a scalar covariate or response).
74#[derive(Debug, Clone)]
75#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
76pub struct NamedVec {
77    pub name: String,
78    pub values: Vec<f64>,
79}
80
81// ─── Layer Keys & Types ─────────────────────────────────────────────────────
82
83/// Key identifying a layer type.
84#[derive(Debug, Clone, PartialEq, Eq, Hash)]
85#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
86#[non_exhaustive]
87pub enum LayerKey {
88    /// Functional PCA decomposition.
89    Fpca,
90    /// PLS decomposition.
91    Pls,
92    /// Elastic alignment (Karcher mean + warps).
93    Alignment,
94    /// Precomputed n×n distance matrix.
95    Distances,
96    /// Functional depth scores.
97    Depth,
98    /// Outlier detection flags.
99    Outliers,
100    /// Cluster assignments.
101    Clusters,
102    /// Scalar-on-function regression fit.
103    Regression,
104    /// Function-on-scalar regression fit.
105    FunctionOnScalar,
106    /// Tolerance / confidence bands.
107    Tolerance,
108    /// Mean curve.
109    Mean,
110    /// SPM Phase I chart.
111    SpmChart,
112    /// SPM Phase II monitoring result.
113    SpmMonitor,
114    /// Explainability result (SHAP, PDP, etc.).
115    Explain,
116    /// User-defined extension.
117    Custom(String),
118}
119
120/// Analysis result attached to an [`FdaData`].
121#[derive(Debug, Clone)]
122#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
123#[non_exhaustive]
124pub enum Layer {
125    Fpca(FpcaLayer),
126    Pls(PlsLayer),
127    Alignment(AlignmentLayer),
128    Distances(DistancesLayer),
129    Depth(DepthLayer),
130    Outliers(OutlierLayer),
131    Clusters(ClusterLayer),
132    Regression(RegressionLayer),
133    FunctionOnScalar(FosrLayer),
134    Tolerance(ToleranceLayer),
135    Mean(MeanLayer),
136    SpmChart(SpmChartLayer),
137    SpmMonitor(SpmMonitorLayer),
138    Explain(ExplainLayer),
139    Custom(CustomLayer),
140}
141
142// ─── Layer Structs ──────────────────────────────────────────────────────────
143
144/// FPCA decomposition.
145#[derive(Debug, Clone)]
146#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
147pub struct FpcaLayer {
148    pub eigenvalues: Vec<f64>,
149    pub variance_explained: Vec<f64>,
150    /// Eigenfunctions (m × ncomp), each column is one eigenfunction.
151    pub eigenfunctions: FdMatrix,
152    /// Scores (n × ncomp).
153    pub scores: FdMatrix,
154    /// Mean function (length m).
155    pub mean: Vec<f64>,
156    /// Integration weights (length m).
157    pub weights: Vec<f64>,
158    pub ncomp: usize,
159}
160
161/// PLS decomposition.
162#[derive(Debug, Clone)]
163#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
164pub struct PlsLayer {
165    /// Weight vectors (m × ncomp).
166    pub weights: FdMatrix,
167    /// Scores (n × ncomp).
168    pub scores: FdMatrix,
169    /// Loadings (m × ncomp).
170    pub loadings: FdMatrix,
171    pub ncomp: usize,
172}
173
174/// Elastic alignment result.
175#[derive(Debug, Clone)]
176#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
177pub struct AlignmentLayer {
178    /// Aligned curves (n × m).
179    pub aligned: FdMatrix,
180    /// Warping functions (n × m).
181    pub warps: FdMatrix,
182    /// Karcher mean (length m).
183    pub mean: Vec<f64>,
184    /// Mean SRSF (length m).
185    pub mean_srsf: Vec<f64>,
186}
187
188/// Precomputed n×n distance matrix with method metadata.
189#[derive(Debug, Clone)]
190#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
191pub struct DistancesLayer {
192    /// Symmetric n×n distance matrix.
193    pub dist_mat: FdMatrix,
194    /// Distance method used (e.g., "elastic", "l2", "dtw", "amplitude", "phase").
195    pub method: String,
196}
197
198/// Functional depth scores.
199#[derive(Debug, Clone)]
200#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
201pub struct DepthLayer {
202    /// Depth score per observation (length n).
203    pub scores: Vec<f64>,
204    /// Method name.
205    pub method: String,
206}
207
208/// Outlier detection result.
209#[derive(Debug, Clone)]
210#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
211pub struct OutlierLayer {
212    /// Outlier flag per observation (length n).
213    pub flags: Vec<bool>,
214    /// Detection threshold.
215    pub threshold: f64,
216    /// Method name.
217    pub method: String,
218    /// Optional: MEI scores (for outliergram).
219    pub mei: Option<Vec<f64>>,
220    /// Optional: MBD scores (for outliergram).
221    pub mbd: Option<Vec<f64>>,
222    /// Optional: magnitude outlyingness.
223    pub magnitude: Option<Vec<f64>>,
224    /// Optional: shape outlyingness.
225    pub shape: Option<Vec<f64>>,
226}
227
228/// Cluster assignments.
229#[derive(Debug, Clone)]
230#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
231pub struct ClusterLayer {
232    /// Cluster label per observation (0-indexed, length n).
233    pub labels: Vec<usize>,
234    /// Number of clusters.
235    pub k: usize,
236    /// Method name.
237    pub method: String,
238    /// Optional: cluster centers (k rows × m cols).
239    pub centers: Option<FdMatrix>,
240    /// Optional: medoid indices (length k).
241    pub medoid_indices: Option<Vec<usize>>,
242    /// Optional: silhouette scores (length n).
243    pub silhouette: Option<Vec<f64>>,
244}
245
246/// Scalar-on-function regression fit.
247#[derive(Debug, Clone)]
248#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
249pub struct RegressionLayer {
250    /// Method name (e.g., "fregre_lm", "fregre_pls", "fregre_np", "elastic").
251    pub method: String,
252    /// Functional coefficient β(t) (length m). `None` for nonparametric.
253    pub beta_t: Option<Vec<f64>>,
254    /// Fitted values (length n).
255    pub fitted_values: Vec<f64>,
256    /// Residuals (length n).
257    pub residuals: Vec<f64>,
258    /// Observed response (length n).
259    pub observed_y: Vec<f64>,
260    /// R².
261    pub r_squared: f64,
262    /// Adjusted R².
263    pub adj_r_squared: Option<f64>,
264    /// Intercept.
265    pub intercept: f64,
266    /// Number of components used (0 for nonparametric).
267    pub ncomp: usize,
268    /// Evaluation grid for β(t).
269    pub argvals: Option<Vec<f64>>,
270    /// Pointwise standard errors of β(t).
271    pub beta_se: Option<Vec<f64>>,
272}
273
274/// Function-on-scalar regression fit.
275#[derive(Debug, Clone)]
276#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
277pub struct FosrLayer {
278    /// Coefficient functions (p × m), one per predictor.
279    pub coefficients: FdMatrix,
280    /// Fitted curves (n × m).
281    pub fitted: FdMatrix,
282    /// R² per grid point (length m).
283    pub r_squared_t: Vec<f64>,
284}
285
286/// Tolerance / confidence band.
287#[derive(Debug, Clone)]
288#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
289pub struct ToleranceLayer {
290    /// Lower bound (length m).
291    pub lower: Vec<f64>,
292    /// Upper bound (length m).
293    pub upper: Vec<f64>,
294    /// Center (length m).
295    pub center: Vec<f64>,
296    /// Method name.
297    pub method: String,
298}
299
300/// Mean curve.
301#[derive(Debug, Clone)]
302#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
303pub struct MeanLayer {
304    /// Mean function (length m).
305    pub mean: Vec<f64>,
306}
307
308/// SPM Phase I chart.
309#[derive(Debug, Clone)]
310#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
311pub struct SpmChartLayer {
312    /// T² control limit.
313    pub t2_limit: f64,
314    /// SPE control limit.
315    pub spe_limit: f64,
316    /// Phase I T² statistics.
317    pub t2_stats: Vec<f64>,
318    /// Phase I SPE statistics.
319    pub spe_stats: Vec<f64>,
320    /// Number of FPC components.
321    pub ncomp: usize,
322    /// Significance level.
323    pub alpha: f64,
324}
325
326/// SPM Phase II monitoring result.
327#[derive(Debug, Clone)]
328#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
329pub struct SpmMonitorLayer {
330    /// T² statistics for new observations.
331    pub t2_stats: Vec<f64>,
332    /// SPE statistics for new observations.
333    pub spe_stats: Vec<f64>,
334    /// T² control limit.
335    pub t2_limit: f64,
336    /// SPE control limit.
337    pub spe_limit: f64,
338    /// T² alarm flags.
339    pub t2_alarms: Vec<bool>,
340    /// SPE alarm flags.
341    pub spe_alarms: Vec<bool>,
342}
343
344/// Explainability result.
345#[derive(Debug, Clone)]
346#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
347pub struct ExplainLayer {
348    /// Method name (e.g., "shap", "pdp", "ale", "permutation_importance").
349    pub method: String,
350    /// Values (interpretation depends on method).
351    pub values: Vec<f64>,
352    /// Labels for the values.
353    pub labels: Vec<String>,
354    /// Additional method-specific data.
355    pub extra: Option<HashMap<String, Vec<f64>>>,
356}
357
358/// User-defined layer for extensions.
359#[derive(Debug, Clone)]
360#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
361pub struct CustomLayer {
362    pub name: String,
363    pub data: HashMap<String, Vec<f64>>,
364}
365
366// ─── FdaData Constructors ───────────────────────────────────────────────────
367
368impl FdaData {
369    /// Create from functional curves + grid.
370    pub fn from_curves(curves: FdMatrix, argvals: Vec<f64>) -> Self {
371        Self {
372            curves: Some(curves),
373            argvals: Some(argvals),
374            grouping: None,
375            group_names: None,
376            scalar_vars: Vec::new(),
377            tabular: None,
378            column_names: None,
379            layers: HashMap::new(),
380        }
381    }
382
383    /// Create from tabular (non-functional) data.
384    pub fn from_tabular(tabular: FdMatrix, column_names: Vec<String>) -> Self {
385        Self {
386            curves: None,
387            argvals: None,
388            grouping: None,
389            group_names: None,
390            scalar_vars: Vec::new(),
391            tabular: Some(tabular),
392            column_names: Some(column_names),
393            layers: HashMap::new(),
394        }
395    }
396
397    /// Create empty container.
398    pub fn empty() -> Self {
399        Self {
400            curves: None,
401            argvals: None,
402            grouping: None,
403            group_names: None,
404            scalar_vars: Vec::new(),
405            tabular: None,
406            column_names: None,
407            layers: HashMap::new(),
408        }
409    }
410
411    // ── Requirement checks ──
412
413    /// Require functional curves to be present.
414    pub fn require_curves(&self) -> Result<(&FdMatrix, &[f64]), String> {
415        match (&self.curves, &self.argvals) {
416            (Some(c), Some(a)) => Ok((c, a)),
417            _ => Err("FdaData requires functional curves + argvals".into()),
418        }
419    }
420
421    /// Require a specific layer to be present.
422    pub fn require_layer(&self, key: &LayerKey) -> Result<&Layer, String> {
423        self.layers
424            .get(key)
425            .ok_or_else(|| format!("FdaData missing required layer: {key:?}"))
426    }
427
428    // ── Layer access ──
429
430    /// Check if a layer is present.
431    pub fn has_layer(&self, key: &LayerKey) -> bool {
432        self.layers.contains_key(key)
433    }
434
435    /// Get a layer by key.
436    pub fn get_layer(&self, key: &LayerKey) -> Option<&Layer> {
437        self.layers.get(key)
438    }
439
440    /// Set (add or replace) a layer.
441    pub fn set_layer(&mut self, key: LayerKey, layer: Layer) {
442        self.layers.insert(key, layer);
443    }
444
445    /// Remove a layer.
446    pub fn remove_layer(&mut self, key: &LayerKey) -> Option<Layer> {
447        self.layers.remove(key)
448    }
449
450    /// List all layer keys present.
451    pub fn layer_keys(&self) -> Vec<&LayerKey> {
452        self.layers.keys().collect()
453    }
454
455    // ── Typed layer accessors ──
456
457    /// Get FPCA layer if present.
458    pub fn fpca(&self) -> Option<&FpcaLayer> {
459        match self.layers.get(&LayerKey::Fpca)? {
460            Layer::Fpca(l) => Some(l),
461            _ => None,
462        }
463    }
464
465    /// Get distances layer if present.
466    pub fn distances(&self) -> Option<&DistancesLayer> {
467        match self.layers.get(&LayerKey::Distances)? {
468            Layer::Distances(l) => Some(l),
469            _ => None,
470        }
471    }
472
473    /// Get alignment layer if present.
474    pub fn alignment(&self) -> Option<&AlignmentLayer> {
475        match self.layers.get(&LayerKey::Alignment)? {
476            Layer::Alignment(l) => Some(l),
477            _ => None,
478        }
479    }
480
481    /// Get regression layer if present.
482    pub fn regression(&self) -> Option<&RegressionLayer> {
483        match self.layers.get(&LayerKey::Regression)? {
484            Layer::Regression(l) => Some(l),
485            _ => None,
486        }
487    }
488
489    /// Get cluster layer if present.
490    pub fn clusters(&self) -> Option<&ClusterLayer> {
491        match self.layers.get(&LayerKey::Clusters)? {
492            Layer::Clusters(l) => Some(l),
493            _ => None,
494        }
495    }
496
497    /// Get depth layer if present.
498    pub fn depth(&self) -> Option<&DepthLayer> {
499        match self.layers.get(&LayerKey::Depth)? {
500            Layer::Depth(l) => Some(l),
501            _ => None,
502        }
503    }
504
505    /// Get outlier layer if present.
506    pub fn outliers(&self) -> Option<&OutlierLayer> {
507        match self.layers.get(&LayerKey::Outliers)? {
508            Layer::Outliers(l) => Some(l),
509            _ => None,
510        }
511    }
512
513    // ── Metadata helpers ──
514
515    /// Number of observations (from curves, tabular, or first scalar var).
516    pub fn n_obs(&self) -> usize {
517        if let Some(c) = &self.curves {
518            return c.nrows();
519        }
520        if let Some(t) = &self.tabular {
521            return t.nrows();
522        }
523        self.scalar_vars.first().map_or(0, |v| v.values.len())
524    }
525
526    /// Number of grid points (0 if no functional data).
527    pub fn n_points(&self) -> usize {
528        self.argvals.as_ref().map_or(0, |a| a.len())
529    }
530
531    /// Add a scalar variable.
532    pub fn add_scalar(&mut self, name: impl Into<String>, values: Vec<f64>) {
533        self.scalar_vars.push(NamedVec {
534            name: name.into(),
535            values,
536        });
537    }
538
539    /// Get a scalar variable by name.
540    pub fn get_scalar(&self, name: &str) -> Option<&[f64]> {
541        self.scalar_vars
542            .iter()
543            .find(|v| v.name == name)
544            .map(|v| v.values.as_slice())
545    }
546}
547
548// ─── Tests ──────────────────────────────────────────────────────────────────
549
550#[cfg(test)]
551mod tests {
552    use super::*;
553
554    #[test]
555    fn from_curves_basic() {
556        let fd = FdaData::from_curves(
557            FdMatrix::zeros(10, 50),
558            (0..50).map(|i| i as f64 / 49.0).collect(),
559        );
560        assert_eq!(fd.n_obs(), 10);
561        assert_eq!(fd.n_points(), 50);
562        assert!(fd.require_curves().is_ok());
563        assert!(!fd.has_layer(&LayerKey::Fpca));
564    }
565
566    #[test]
567    fn add_and_retrieve_layers() {
568        let mut fd = FdaData::from_curves(
569            FdMatrix::zeros(5, 20),
570            (0..20).map(|i| i as f64 / 19.0).collect(),
571        );
572
573        fd.set_layer(
574            LayerKey::Depth,
575            Layer::Depth(DepthLayer {
576                scores: vec![0.5; 5],
577                method: "fraiman_muniz".into(),
578            }),
579        );
580
581        assert!(fd.has_layer(&LayerKey::Depth));
582        assert!(!fd.has_layer(&LayerKey::Fpca));
583        assert!(fd.depth().is_some());
584        assert_eq!(fd.depth().unwrap().scores.len(), 5);
585        assert_eq!(fd.layer_keys().len(), 1);
586    }
587
588    #[test]
589    fn require_missing_layer_errors() {
590        let fd = FdaData::from_curves(FdMatrix::zeros(3, 10), vec![0.0; 10]);
591        assert!(fd.require_layer(&LayerKey::Fpca).is_err());
592    }
593
594    #[test]
595    fn scalar_vars() {
596        let mut fd = FdaData::empty();
597        fd.add_scalar("height", vec![170.0, 180.0, 165.0]);
598        assert_eq!(fd.get_scalar("height").unwrap(), &[170.0, 180.0, 165.0]);
599        assert!(fd.get_scalar("weight").is_none());
600        assert_eq!(fd.n_obs(), 3);
601    }
602
603    #[test]
604    fn multiple_layers_compose() {
605        let mut fd = FdaData::from_curves(FdMatrix::zeros(10, 30), vec![0.0; 30]);
606
607        fd.set_layer(
608            LayerKey::Depth,
609            Layer::Depth(DepthLayer {
610                scores: vec![0.5; 10],
611                method: "fm".into(),
612            }),
613        );
614        fd.set_layer(
615            LayerKey::Outliers,
616            Layer::Outliers(OutlierLayer {
617                flags: vec![false; 10],
618                threshold: 0.1,
619                method: "lrt".into(),
620                mei: None,
621                mbd: None,
622                magnitude: None,
623                shape: None,
624            }),
625        );
626        fd.set_layer(
627            LayerKey::Distances,
628            Layer::Distances(DistancesLayer {
629                dist_mat: FdMatrix::zeros(10, 10),
630                method: "elastic".into(),
631            }),
632        );
633
634        assert_eq!(fd.layer_keys().len(), 3);
635        assert!(fd.depth().is_some());
636        assert!(fd.outliers().is_some());
637        assert!(fd.distances().is_some());
638    }
639}