use crate::error::FdarError;
use crate::matrix::FdMatrix;
use crate::regression::{fdata_to_pc_1d, FpcaResult};
use super::control::{spe_control_limit, t2_control_limit, ControlLimit};
use super::mfpca::{mfpca, MfpcaConfig, MfpcaResult};
use super::stats::{hotelling_t2, spe_multivariate, spe_univariate};
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct SpmConfig {
pub ncomp: usize,
pub alpha: f64,
pub tuning_fraction: f64,
pub seed: u64,
}
impl Default for SpmConfig {
fn default() -> Self {
Self {
ncomp: 5,
alpha: 0.05,
tuning_fraction: 0.5,
seed: 42,
}
}
}
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub struct SpmChart {
pub fpca: FpcaResult,
pub eigenvalues: Vec<f64>,
pub t2_phase1: Vec<f64>,
pub spe_phase1: Vec<f64>,
pub t2_limit: ControlLimit,
pub spe_limit: ControlLimit,
pub config: SpmConfig,
pub sample_size_adequate: bool,
}
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub struct MfSpmChart {
pub mfpca: MfpcaResult,
pub t2_phase1: Vec<f64>,
pub spe_phase1: Vec<f64>,
pub t2_limit: ControlLimit,
pub spe_limit: ControlLimit,
pub config: SpmConfig,
}
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub struct SpmMonitorResult {
pub t2: Vec<f64>,
pub spe: Vec<f64>,
pub t2_alarm: Vec<bool>,
pub spe_alarm: Vec<bool>,
pub scores: FdMatrix,
}
pub(super) fn split_indices(n: usize, tuning_fraction: f64, seed: u64) -> (Vec<usize>, Vec<usize>) {
let n_tune = ((n as f64 * tuning_fraction).round() as usize)
.max(2)
.min(n - 1);
let mut indices: Vec<usize> = (0..n).collect();
let mut rng_state: u64 = seed;
for i in (1..n).rev() {
let j = pcg_next(&mut rng_state) as usize % (i + 1);
indices.swap(i, j);
}
let tune_indices: Vec<usize> = indices[..n_tune].to_vec();
let cal_indices: Vec<usize> = indices[n_tune..].to_vec();
(tune_indices, cal_indices)
}
fn pcg_next(state: &mut u64) -> u32 {
let old = *state;
*state = old.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
let xorshifted = (((old >> 18) ^ old) >> 27) as u32;
let rot = (old >> 59) as u32;
xorshifted.rotate_right(rot)
}
pub(super) fn centered_reconstruct(fpca: &FpcaResult, scores: &FdMatrix, ncomp: usize) -> FdMatrix {
let n = scores.nrows();
let m = fpca.mean.len();
let ncomp = ncomp.min(fpca.rotation.ncols()).min(scores.ncols());
let mut recon = FdMatrix::zeros(n, m);
for i in 0..n {
for j in 0..m {
let mut val = 0.0;
for k in 0..ncomp {
val += scores[(i, k)] * fpca.rotation[(j, k)];
}
recon[(i, j)] = val;
}
}
recon
}
pub(super) fn center_data(data: &FdMatrix, mean: &[f64]) -> FdMatrix {
let (n, m) = data.shape();
let mut centered = FdMatrix::zeros(n, m);
for i in 0..n {
for j in 0..m {
centered[(i, j)] = data[(i, j)] - mean[j];
}
}
centered
}
#[must_use = "expensive computation whose result should not be discarded"]
pub fn spm_phase1(
data: &FdMatrix,
argvals: &[f64],
config: &SpmConfig,
) -> Result<SpmChart, FdarError> {
let (n, m) = data.shape();
if n < 4 {
return Err(FdarError::InvalidDimension {
parameter: "data",
expected: "at least 4 observations for tuning/calibration split".to_string(),
actual: format!("{n} observations"),
});
}
let sample_size_adequate = n >= 10 * config.ncomp;
if argvals.len() != m {
return Err(FdarError::InvalidDimension {
parameter: "argvals",
expected: format!("{m}"),
actual: format!("{}", argvals.len()),
});
}
let (tune_idx, cal_idx) = split_indices(n, config.tuning_fraction, config.seed);
let tune_data = crate::cv::subset_rows(data, &tune_idx);
let cal_data = crate::cv::subset_rows(data, &cal_idx);
let n_tune = tune_data.nrows();
if n_tune < 3 {
return Err(FdarError::InvalidDimension {
parameter: "data",
expected: "tuning set with at least 3 observations".to_string(),
actual: format!(
"{n_tune} observations in tuning set (increase data size or tuning_fraction)"
),
});
}
let ncomp = config.ncomp.min(n_tune - 1).min(m);
let fpca = fdata_to_pc_1d(&tune_data, ncomp, argvals)?;
let actual_ncomp = fpca.scores.ncols();
let eigenvalues: Vec<f64> = fpca
.singular_values
.iter()
.take(actual_ncomp)
.map(|&sv| sv * sv / (n_tune as f64 - 1.0))
.collect();
let cal_scores = fpca.project(&cal_data)?;
let t2_phase1 = hotelling_t2(&cal_scores, &eigenvalues)?;
let cal_centered = center_data(&cal_data, &fpca.mean);
let cal_recon_centered = centered_reconstruct(&fpca, &cal_scores, actual_ncomp);
let spe_phase1 = spe_univariate(&cal_centered, &cal_recon_centered, argvals)?;
let t2_limit = t2_control_limit(actual_ncomp, config.alpha)?;
let spe_limit = spe_control_limit(&spe_phase1, config.alpha)?;
Ok(SpmChart {
fpca,
eigenvalues,
t2_phase1,
spe_phase1,
t2_limit,
spe_limit,
config: config.clone(),
sample_size_adequate,
})
}
#[must_use = "monitoring result should not be discarded"]
pub fn spm_monitor(
chart: &SpmChart,
new_data: &FdMatrix,
argvals: &[f64],
) -> Result<SpmMonitorResult, FdarError> {
let m = chart.fpca.mean.len();
if new_data.ncols() != m {
return Err(FdarError::InvalidDimension {
parameter: "new_data",
expected: format!("{m} columns"),
actual: format!("{} columns", new_data.ncols()),
});
}
let ncomp = chart.eigenvalues.len();
let scores = chart.fpca.project(new_data)?;
let t2 = hotelling_t2(&scores, &chart.eigenvalues)?;
let centered = center_data(new_data, &chart.fpca.mean);
let recon_centered = centered_reconstruct(&chart.fpca, &scores, ncomp);
let spe = spe_univariate(¢ered, &recon_centered, argvals)?;
let t2_alarm: Vec<bool> = t2.iter().map(|&v| v > chart.t2_limit.ucl).collect();
let spe_alarm: Vec<bool> = spe.iter().map(|&v| v > chart.spe_limit.ucl).collect();
Ok(SpmMonitorResult {
t2,
spe,
t2_alarm,
spe_alarm,
scores,
})
}
#[must_use = "expensive computation whose result should not be discarded"]
pub fn mf_spm_phase1(
variables: &[&FdMatrix],
argvals_list: &[&[f64]],
config: &SpmConfig,
) -> Result<MfSpmChart, FdarError> {
if variables.is_empty() {
return Err(FdarError::InvalidDimension {
parameter: "variables",
expected: "at least 1 variable".to_string(),
actual: "0 variables".to_string(),
});
}
if variables.len() != argvals_list.len() {
return Err(FdarError::InvalidDimension {
parameter: "argvals_list",
expected: format!("{} (matching variables)", variables.len()),
actual: format!("{}", argvals_list.len()),
});
}
let n = variables[0].nrows();
if n < 4 {
return Err(FdarError::InvalidDimension {
parameter: "variables",
expected: "at least 4 observations".to_string(),
actual: format!("{n} observations"),
});
}
for (p, (var, argvals)) in variables.iter().zip(argvals_list.iter()).enumerate() {
if var.ncols() != argvals.len() {
return Err(FdarError::InvalidDimension {
parameter: "argvals_list",
expected: format!("{} for variable {p}", var.ncols()),
actual: format!("{}", argvals.len()),
});
}
}
let (tune_idx, cal_idx) = split_indices(n, config.tuning_fraction, config.seed);
let tune_vars: Vec<FdMatrix> = variables
.iter()
.map(|v| crate::cv::subset_rows(v, &tune_idx))
.collect();
let cal_vars: Vec<FdMatrix> = variables
.iter()
.map(|v| crate::cv::subset_rows(v, &cal_idx))
.collect();
let tune_refs: Vec<&FdMatrix> = tune_vars.iter().collect();
let mfpca_config = MfpcaConfig {
ncomp: config.ncomp,
weighted: true,
};
let mfpca_result = mfpca(&tune_refs, &mfpca_config)?;
let actual_ncomp = mfpca_result.eigenvalues.len();
let cal_refs: Vec<&FdMatrix> = cal_vars.iter().collect();
let cal_scores = mfpca_result.project(&cal_refs)?;
let t2_phase1 = hotelling_t2(&cal_scores, &mfpca_result.eigenvalues)?;
let cal_recon = mfpca_result.reconstruct(&cal_scores, actual_ncomp)?;
let n_cal = cal_vars[0].nrows();
let mut std_vars: Vec<FdMatrix> = Vec::with_capacity(variables.len());
let mut std_recon: Vec<FdMatrix> = Vec::with_capacity(variables.len());
for (p, cal_var) in cal_vars.iter().enumerate() {
let m_p = cal_var.ncols();
let scale = if mfpca_result.scales[p] > 1e-15 {
mfpca_result.scales[p]
} else {
1.0
};
let mut std_mat = FdMatrix::zeros(n_cal, m_p);
let mut recon_mat = FdMatrix::zeros(n_cal, m_p);
for i in 0..n_cal {
for j in 0..m_p {
std_mat[(i, j)] = (cal_var[(i, j)] - mfpca_result.means[p][j]) / scale;
recon_mat[(i, j)] = (cal_recon[p][(i, j)] - mfpca_result.means[p][j]) / scale;
}
}
std_vars.push(std_mat);
std_recon.push(recon_mat);
}
let std_refs: Vec<&FdMatrix> = std_vars.iter().collect();
let recon_refs: Vec<&FdMatrix> = std_recon.iter().collect();
let spe_phase1 = spe_multivariate(&std_refs, &recon_refs, argvals_list)?;
let t2_limit = t2_control_limit(actual_ncomp, config.alpha)?;
let spe_limit = spe_control_limit(&spe_phase1, config.alpha)?;
Ok(MfSpmChart {
mfpca: mfpca_result,
t2_phase1,
spe_phase1,
t2_limit,
spe_limit,
config: config.clone(),
})
}
#[must_use = "monitoring result should not be discarded"]
pub fn mf_spm_monitor(
chart: &MfSpmChart,
new_variables: &[&FdMatrix],
argvals_list: &[&[f64]],
) -> Result<SpmMonitorResult, FdarError> {
let n_vars = chart.mfpca.means.len();
if new_variables.len() != n_vars {
return Err(FdarError::InvalidDimension {
parameter: "new_variables",
expected: format!("{n_vars} variables"),
actual: format!("{} variables", new_variables.len()),
});
}
let actual_ncomp = chart.mfpca.eigenvalues.len();
let scores = chart.mfpca.project(new_variables)?;
let t2 = hotelling_t2(&scores, &chart.mfpca.eigenvalues)?;
let recon = chart.mfpca.reconstruct(&scores, actual_ncomp)?;
let n_new = new_variables[0].nrows();
let mut std_vars: Vec<FdMatrix> = Vec::with_capacity(n_vars);
let mut std_recon: Vec<FdMatrix> = Vec::with_capacity(n_vars);
for (p, new_var) in new_variables.iter().enumerate() {
let m_p = new_var.ncols();
let scale = if chart.mfpca.scales[p] > 1e-15 {
chart.mfpca.scales[p]
} else {
1.0
};
let mut std_mat = FdMatrix::zeros(n_new, m_p);
let mut recon_mat = FdMatrix::zeros(n_new, m_p);
for i in 0..n_new {
for j in 0..m_p {
std_mat[(i, j)] = (new_var[(i, j)] - chart.mfpca.means[p][j]) / scale;
recon_mat[(i, j)] = (recon[p][(i, j)] - chart.mfpca.means[p][j]) / scale;
}
}
std_vars.push(std_mat);
std_recon.push(recon_mat);
}
let std_refs: Vec<&FdMatrix> = std_vars.iter().collect();
let recon_refs: Vec<&FdMatrix> = std_recon.iter().collect();
let spe = spe_multivariate(&std_refs, &recon_refs, argvals_list)?;
let t2_alarm: Vec<bool> = t2.iter().map(|&v| v > chart.t2_limit.ucl).collect();
let spe_alarm: Vec<bool> = spe.iter().map(|&v| v > chart.spe_limit.ucl).collect();
Ok(SpmMonitorResult {
t2,
spe,
t2_alarm,
spe_alarm,
scores,
})
}
#[cfg(all(test, feature = "serde"))]
mod tests {
use super::*;
use crate::simulation::{sim_fundata, EFunType, EValType};
#[test]
fn spm_chart_roundtrip_serde() {
let t: Vec<f64> = (0..50).map(|i| i as f64 / 49.0).collect();
let data = sim_fundata(
40,
&t,
5,
EFunType::Fourier,
EValType::Exponential,
Some(42),
);
let config = SpmConfig {
ncomp: 3,
alpha: 0.05,
..Default::default()
};
let chart = spm_phase1(&data, &t, &config).unwrap();
let json = serde_json::to_string(&chart).unwrap();
let restored: SpmChart = serde_json::from_str(&json).unwrap();
for (a, b) in chart.t2_phase1.iter().zip(&restored.t2_phase1) {
assert!((a - b).abs() < 1e-12, "t2_phase1 mismatch: {a} vs {b}");
}
assert_eq!(chart.t2_limit.ucl, restored.t2_limit.ucl);
assert_eq!(chart.spe_limit.ucl, restored.spe_limit.ucl);
assert_eq!(chart.config, restored.config);
assert_eq!(chart.eigenvalues.len(), restored.eigenvalues.len());
let new_data = sim_fundata(
10,
&t,
5,
EFunType::Fourier,
EValType::Exponential,
Some(99),
);
let r1 = spm_monitor(&chart, &new_data, &t).unwrap();
let r2 = spm_monitor(&restored, &new_data, &t).unwrap();
for (a, b) in r1.t2.iter().zip(&r2.t2) {
assert!((a - b).abs() < 1e-10, "t2 mismatch: {a} vs {b}");
}
assert_eq!(r1.t2_alarm, r2.t2_alarm);
assert_eq!(r1.spe_alarm, r2.spe_alarm);
}
}