Skip to main content

fdars_core/spm/
iterative.rs

1//! Iterative Phase I chart construction for SPM.
2//!
3//! Repeatedly builds SPM charts and removes out-of-control observations
4//! until convergence, producing a cleaner in-control reference dataset.
5//! This addresses the common problem of Phase I data contamination
6//! where outliers distort the FPCA and control limits.
7//!
8//! # Convergence properties
9//!
10//! The iterative Phase I procedure converges when no new outliers are removed
11//! between iterations. Convergence is guaranteed in at most n iterations (each
12//! iteration removes at least one outlier or terminates). In practice, 3--5
13//! iterations suffice for typical contamination levels (5--15% outliers).
14//! Non-convergence (oscillation) can occur when the contamination fraction
15//! is near the breakdown point of the underlying T-squared / SPE statistics.
16//!
17//! # Breakdown point
18//!
19//! The procedure's breakdown point depends on the initial T-squared threshold.
20//! With alpha = 0.05 and chi-squared limits, the expected breakdown is roughly
21//! 50% for the T-squared statistic (Rousseeuw & Leroy, 1987, section 1.3,
22//! pp. 10--12). For contamination above the breakdown point, consider robust
23//! initialization via projection pursuit or minimum covariance determinant
24//! (MCD) before applying the iterative procedure.
25//!
26//! # References
27//!
28//! - Sullivan, J.H. & Woodall, W.H. (1996). A comparison of multivariate
29//!   control charts for individual observations. *Journal of Quality
30//!   Technology*, 28(4), 398--408, section 3 (iterative Phase I procedure).
31//! - Chenouri, S., Steiner, S.H. & Variyath, A.M. (2009). A multivariate
32//!   robust control chart for individual observations. *Journal of Quality
33//!   Technology*, 41(3), 259--271, section 2 (robust alternatives).
34//! - Rousseeuw, P.J. & Leroy, A.M. (1987). *Robust Regression and Outlier
35//!   Detection*. Wiley, section 1.3, pp. 10--12 (breakdown point),
36//!   section 4.1, pp. 116--119 (iterative reweighting).
37
38use crate::error::FdarError;
39use crate::matrix::FdMatrix;
40
41use super::phase::{spm_monitor, spm_phase1, SpmChart, SpmConfig};
42
43/// Configuration for iterative Phase I chart construction.
44///
45/// The iterative approach assumes outliers are a minority of the data.
46/// When more than `max_removal_fraction` of the original data would be
47/// removed, the procedure stops early, preserving the remaining data
48/// for analysis.
49#[derive(Debug, Clone, PartialEq)]
50pub struct IterativePhase1Config {
51    /// Base SPM configuration.
52    pub spm: SpmConfig,
53    /// Maximum number of iterations (default 10).
54    pub max_iterations: usize,
55    /// Remove observations exceeding the T-squared limit (default true).
56    pub remove_t2_outliers: bool,
57    /// Remove observations exceeding the SPE limit (default true).
58    pub remove_spe_outliers: bool,
59    /// Maximum cumulative fraction of original data that can be removed (default 0.3).
60    /// Iteration stops if the next removal batch would push the total removed
61    /// count above this fraction of the original dataset size.
62    ///
63    /// This acts as a safeguard against breakdown: if more than 30% of the data
64    /// is flagged, the in-control model is likely misspecified rather than there
65    /// being isolated outliers (Rousseeuw & Leroy, 1987, section 4.1, pp. 116--119).
66    ///
67    /// If removal rates don't decrease across iterations (e.g., oscillating
68    /// around 0.3--0.5), the process likely has sustained non-stationarity
69    /// rather than isolated outliers. Consider increasing `alpha` or
70    /// investigating the data for structural changes.
71    pub max_removal_fraction: f64,
72}
73
74impl Default for IterativePhase1Config {
75    fn default() -> Self {
76        Self {
77            spm: SpmConfig::default(),
78            max_iterations: 10,
79            remove_t2_outliers: true,
80            remove_spe_outliers: true,
81            max_removal_fraction: 0.3,
82        }
83    }
84}
85
86/// Result of iterative Phase I chart construction.
87#[derive(Debug, Clone, PartialEq)]
88#[non_exhaustive]
89pub struct IterativePhase1Result {
90    /// Final SPM chart after outlier removal.
91    pub chart: SpmChart,
92    /// Number of iterations performed.
93    pub n_iterations: usize,
94    /// Indices of removed observations (relative to original data).
95    pub removed_indices: Vec<usize>,
96    /// Number of observations remaining.
97    pub n_remaining: usize,
98    /// History of observations removed per iteration.
99    pub removal_history: Vec<Vec<usize>>,
100    /// Fraction of observations removed per iteration (convergence diagnostic).
101    /// A decreasing sequence indicates convergence. Rates > 0.5 at any
102    /// iteration suggest the control limits may be too tight or the process
103    /// is genuinely unstable.
104    pub removal_rates: Vec<f64>,
105}
106
107/// Iteratively build a Phase I SPM chart by removing out-of-control observations.
108///
109/// Standard Phase I (`spm_phase1`) builds the chart once. However, if the training
110/// data contains outliers, the chart may be contaminated. This function repeatedly:
111///
112/// 1. Builds a chart from the current clean data
113/// 2. Monitors all current data against the chart
114/// 3. Removes observations flagged as out-of-control
115/// 4. Repeats until no more observations are removed or the maximum number of
116///    iterations is reached
117///
118/// # Arguments
119/// * `data` - In-control functional data (n x m)
120/// * `argvals` - Grid points (length m)
121/// * `config` - Iterative Phase I configuration
122///
123/// # Example
124///
125/// ```
126/// use fdars_core::matrix::FdMatrix;
127/// use fdars_core::spm::iterative::{spm_phase1_iterative, IterativePhase1Config};
128/// use fdars_core::spm::phase::SpmConfig;
129/// let data = FdMatrix::from_column_major(
130///     (0..200).map(|i| (i as f64 * 0.1).sin()).collect(), 20, 10
131/// ).unwrap();
132/// let argvals: Vec<f64> = (0..10).map(|i| i as f64 / 9.0).collect();
133/// let config = IterativePhase1Config {
134///     spm: SpmConfig { ncomp: 2, ..SpmConfig::default() },
135///     ..IterativePhase1Config::default()
136/// };
137/// let result = spm_phase1_iterative(&data, &argvals, &config).unwrap();
138/// assert!(result.n_iterations <= config.max_iterations);
139/// ```
140///
141/// # Errors
142///
143/// Returns `FdarError::InvalidParameter` if `max_iterations < 1` or
144/// `max_removal_fraction` is not in (0, 1]. Dimension errors are propagated
145/// from `spm_phase1`.
146#[must_use = "expensive computation whose result should not be discarded"]
147pub fn spm_phase1_iterative(
148    data: &FdMatrix,
149    argvals: &[f64],
150    config: &IterativePhase1Config,
151) -> Result<IterativePhase1Result, FdarError> {
152    // Validate iterative-specific parameters.
153    // alpha must be in (0, 1). Smaller alpha values (e.g., 0.01) produce wider
154    // control limits and remove fewer observations per iteration, yielding a more
155    // conservative procedure. Larger alpha (e.g., 0.10) is more aggressive and
156    // converges faster but risks removing in-control observations (masking).
157    // The default alpha = 0.05 balances sensitivity and specificity for typical
158    // contamination levels (5--15%).
159    if config.spm.alpha <= 0.0 || config.spm.alpha >= 1.0 {
160        return Err(FdarError::InvalidParameter {
161            parameter: "alpha",
162            message: format!("alpha must be in (0, 1), got {}", config.spm.alpha),
163        });
164    }
165    if config.max_iterations < 1 {
166        return Err(FdarError::InvalidParameter {
167            parameter: "max_iterations",
168            message: format!(
169                "max_iterations must be at least 1, got {}",
170                config.max_iterations
171            ),
172        });
173    }
174    if config.max_removal_fraction <= 0.0 || config.max_removal_fraction > 1.0 {
175        return Err(FdarError::InvalidParameter {
176            parameter: "max_removal_fraction",
177            message: format!(
178                "max_removal_fraction must be in (0, 1], got {}",
179                config.max_removal_fraction
180            ),
181        });
182    }
183
184    let n_original = data.nrows();
185    let mut remaining_indices: Vec<usize> = (0..n_original).collect();
186    let mut all_removed: Vec<usize> = vec![];
187    let mut removal_history: Vec<Vec<usize>> = vec![];
188    let mut removal_rates: Vec<f64> = vec![];
189
190    let mut chart = None;
191
192    for _ in 0..config.max_iterations {
193        // Build chart from current data
194        let current_data = crate::cv::subset_rows(data, &remaining_indices);
195        let current_chart = spm_phase1(&current_data, argvals, &config.spm)?;
196
197        // Monitor the same data against the chart
198        let monitor = spm_monitor(&current_chart, &current_data, argvals)?;
199
200        // Identify out-of-control observations
201        let n_current = remaining_indices.len();
202        let mut flagged_local: Vec<usize> = Vec::new();
203        for i in 0..n_current {
204            let is_flagged = (config.remove_t2_outliers && monitor.t2_alarm[i])
205                || (config.remove_spe_outliers && monitor.spe_alarm[i]);
206            if is_flagged {
207                flagged_local.push(i);
208            }
209        }
210
211        // Converged: no observations flagged
212        if flagged_local.is_empty() {
213            chart = Some(current_chart);
214            break;
215        }
216
217        // Check cumulative removal: total removed so far (including this batch)
218        // against the maximum allowed fraction of the ORIGINAL dataset.
219        // The 0.5 removal rate threshold is a practical heuristic: if more
220        // than half the remaining data is flagged in one iteration, the
221        // in-control model is likely misspecified rather than there being
222        // individual outliers. This aligns with the breakdown point of
223        // classical outlier detection methods (Rousseeuw & Leroy, 1987).
224        let total_removed = all_removed.len() + flagged_local.len();
225        if total_removed as f64 / n_original as f64 > config.max_removal_fraction {
226            chart = Some(current_chart);
227            break;
228        }
229
230        // Check if remaining after removal would be too few
231        let n_after = n_current - flagged_local.len();
232        if n_after < 4 {
233            chart = Some(current_chart);
234            break;
235        }
236
237        // Map flagged local indices back to original indices
238        let flagged_original: Vec<usize> = flagged_local
239            .iter()
240            .map(|&i| remaining_indices[i])
241            .collect();
242
243        // Update remaining_indices by removing flagged ones
244        let flagged_set: std::collections::HashSet<usize> = flagged_local.iter().copied().collect();
245        remaining_indices = remaining_indices
246            .iter()
247            .enumerate()
248            .filter(|(local_i, _)| !flagged_set.contains(local_i))
249            .map(|(_, &orig_i)| orig_i)
250            .collect();
251
252        let removal_rate = flagged_original.len() as f64 / n_current as f64;
253        removal_rates.push(removal_rate);
254        all_removed.extend_from_slice(&flagged_original);
255        removal_history.push(flagged_original);
256    }
257
258    // Build the final chart if we exhausted iterations without converging
259    let final_chart = match chart {
260        Some(c) => c,
261        None => {
262            let final_data = crate::cv::subset_rows(data, &remaining_indices);
263            spm_phase1(&final_data, argvals, &config.spm)?
264        }
265    };
266
267    Ok(IterativePhase1Result {
268        chart: final_chart,
269        n_iterations: removal_history.len(),
270        removed_indices: all_removed,
271        n_remaining: remaining_indices.len(),
272        removal_history,
273        removal_rates,
274    })
275}