fdars_core/spm/iterative.rs
1//! Iterative Phase I chart construction for SPM.
2//!
3//! Repeatedly builds SPM charts and removes out-of-control observations
4//! until convergence, producing a cleaner in-control reference dataset.
5//! This addresses the common problem of Phase I data contamination
6//! where outliers distort the FPCA and control limits.
7//!
8//! # Convergence properties
9//!
10//! The iterative Phase I procedure converges when no new outliers are removed
11//! between iterations. Convergence is guaranteed in at most n iterations (each
12//! iteration removes at least one outlier or terminates). In practice, 3--5
13//! iterations suffice for typical contamination levels (5--15% outliers).
14//! Non-convergence (oscillation) can occur when the contamination fraction
15//! is near the breakdown point of the underlying T-squared / SPE statistics.
16//!
17//! # Breakdown point
18//!
19//! The procedure's breakdown point depends on the initial T-squared threshold.
20//! With alpha = 0.05 and chi-squared limits, the expected breakdown is roughly
21//! 50% for the T-squared statistic (Rousseeuw & Leroy, 1987, section 1.3,
22//! pp. 10--12). For contamination above the breakdown point, consider robust
23//! initialization via projection pursuit or minimum covariance determinant
24//! (MCD) before applying the iterative procedure.
25//!
26//! # References
27//!
28//! - Sullivan, J.H. & Woodall, W.H. (1996). A comparison of multivariate
29//! control charts for individual observations. *Journal of Quality
30//! Technology*, 28(4), 398--408, section 3 (iterative Phase I procedure).
31//! - Chenouri, S., Steiner, S.H. & Variyath, A.M. (2009). A multivariate
32//! robust control chart for individual observations. *Journal of Quality
33//! Technology*, 41(3), 259--271, section 2 (robust alternatives).
34//! - Rousseeuw, P.J. & Leroy, A.M. (1987). *Robust Regression and Outlier
35//! Detection*. Wiley, section 1.3, pp. 10--12 (breakdown point),
36//! section 4.1, pp. 116--119 (iterative reweighting).
37
38use crate::error::FdarError;
39use crate::matrix::FdMatrix;
40
41use super::phase::{spm_monitor, spm_phase1, SpmChart, SpmConfig};
42
43/// Configuration for iterative Phase I chart construction.
44///
45/// The iterative approach assumes outliers are a minority of the data.
46/// When more than `max_removal_fraction` of the original data would be
47/// removed, the procedure stops early, preserving the remaining data
48/// for analysis.
49#[derive(Debug, Clone, PartialEq)]
50pub struct IterativePhase1Config {
51 /// Base SPM configuration.
52 pub spm: SpmConfig,
53 /// Maximum number of iterations (default 10).
54 pub max_iterations: usize,
55 /// Remove observations exceeding the T-squared limit (default true).
56 pub remove_t2_outliers: bool,
57 /// Remove observations exceeding the SPE limit (default true).
58 pub remove_spe_outliers: bool,
59 /// Maximum cumulative fraction of original data that can be removed (default 0.3).
60 /// Iteration stops if the next removal batch would push the total removed
61 /// count above this fraction of the original dataset size.
62 ///
63 /// This acts as a safeguard against breakdown: if more than 30% of the data
64 /// is flagged, the in-control model is likely misspecified rather than there
65 /// being isolated outliers (Rousseeuw & Leroy, 1987, section 4.1, pp. 116--119).
66 ///
67 /// If removal rates don't decrease across iterations (e.g., oscillating
68 /// around 0.3--0.5), the process likely has sustained non-stationarity
69 /// rather than isolated outliers. Consider increasing `alpha` or
70 /// investigating the data for structural changes.
71 pub max_removal_fraction: f64,
72}
73
74impl Default for IterativePhase1Config {
75 fn default() -> Self {
76 Self {
77 spm: SpmConfig::default(),
78 max_iterations: 10,
79 remove_t2_outliers: true,
80 remove_spe_outliers: true,
81 max_removal_fraction: 0.3,
82 }
83 }
84}
85
86/// Result of iterative Phase I chart construction.
87#[derive(Debug, Clone, PartialEq)]
88#[non_exhaustive]
89pub struct IterativePhase1Result {
90 /// Final SPM chart after outlier removal.
91 pub chart: SpmChart,
92 /// Number of iterations performed.
93 pub n_iterations: usize,
94 /// Indices of removed observations (relative to original data).
95 pub removed_indices: Vec<usize>,
96 /// Number of observations remaining.
97 pub n_remaining: usize,
98 /// History of observations removed per iteration.
99 pub removal_history: Vec<Vec<usize>>,
100 /// Fraction of observations removed per iteration (convergence diagnostic).
101 /// A decreasing sequence indicates convergence. Rates > 0.5 at any
102 /// iteration suggest the control limits may be too tight or the process
103 /// is genuinely unstable.
104 pub removal_rates: Vec<f64>,
105}
106
107/// Iteratively build a Phase I SPM chart by removing out-of-control observations.
108///
109/// Standard Phase I (`spm_phase1`) builds the chart once. However, if the training
110/// data contains outliers, the chart may be contaminated. This function repeatedly:
111///
112/// 1. Builds a chart from the current clean data
113/// 2. Monitors all current data against the chart
114/// 3. Removes observations flagged as out-of-control
115/// 4. Repeats until no more observations are removed or the maximum number of
116/// iterations is reached
117///
118/// # Arguments
119/// * `data` - In-control functional data (n x m)
120/// * `argvals` - Grid points (length m)
121/// * `config` - Iterative Phase I configuration
122///
123/// # Example
124///
125/// ```
126/// use fdars_core::matrix::FdMatrix;
127/// use fdars_core::spm::iterative::{spm_phase1_iterative, IterativePhase1Config};
128/// use fdars_core::spm::phase::SpmConfig;
129/// let data = FdMatrix::from_column_major(
130/// (0..200).map(|i| (i as f64 * 0.1).sin()).collect(), 20, 10
131/// ).unwrap();
132/// let argvals: Vec<f64> = (0..10).map(|i| i as f64 / 9.0).collect();
133/// let config = IterativePhase1Config {
134/// spm: SpmConfig { ncomp: 2, ..SpmConfig::default() },
135/// ..IterativePhase1Config::default()
136/// };
137/// let result = spm_phase1_iterative(&data, &argvals, &config).unwrap();
138/// assert!(result.n_iterations <= config.max_iterations);
139/// ```
140///
141/// # Errors
142///
143/// Returns `FdarError::InvalidParameter` if `max_iterations < 1` or
144/// `max_removal_fraction` is not in (0, 1]. Dimension errors are propagated
145/// from `spm_phase1`.
146#[must_use = "expensive computation whose result should not be discarded"]
147pub fn spm_phase1_iterative(
148 data: &FdMatrix,
149 argvals: &[f64],
150 config: &IterativePhase1Config,
151) -> Result<IterativePhase1Result, FdarError> {
152 // Validate iterative-specific parameters.
153 // alpha must be in (0, 1). Smaller alpha values (e.g., 0.01) produce wider
154 // control limits and remove fewer observations per iteration, yielding a more
155 // conservative procedure. Larger alpha (e.g., 0.10) is more aggressive and
156 // converges faster but risks removing in-control observations (masking).
157 // The default alpha = 0.05 balances sensitivity and specificity for typical
158 // contamination levels (5--15%).
159 if config.spm.alpha <= 0.0 || config.spm.alpha >= 1.0 {
160 return Err(FdarError::InvalidParameter {
161 parameter: "alpha",
162 message: format!("alpha must be in (0, 1), got {}", config.spm.alpha),
163 });
164 }
165 if config.max_iterations < 1 {
166 return Err(FdarError::InvalidParameter {
167 parameter: "max_iterations",
168 message: format!(
169 "max_iterations must be at least 1, got {}",
170 config.max_iterations
171 ),
172 });
173 }
174 if config.max_removal_fraction <= 0.0 || config.max_removal_fraction > 1.0 {
175 return Err(FdarError::InvalidParameter {
176 parameter: "max_removal_fraction",
177 message: format!(
178 "max_removal_fraction must be in (0, 1], got {}",
179 config.max_removal_fraction
180 ),
181 });
182 }
183
184 let n_original = data.nrows();
185 let mut remaining_indices: Vec<usize> = (0..n_original).collect();
186 let mut all_removed: Vec<usize> = vec![];
187 let mut removal_history: Vec<Vec<usize>> = vec![];
188 let mut removal_rates: Vec<f64> = vec![];
189
190 let mut chart = None;
191
192 for _ in 0..config.max_iterations {
193 // Build chart from current data
194 let current_data = crate::cv::subset_rows(data, &remaining_indices);
195 let current_chart = spm_phase1(¤t_data, argvals, &config.spm)?;
196
197 // Monitor the same data against the chart
198 let monitor = spm_monitor(¤t_chart, ¤t_data, argvals)?;
199
200 // Identify out-of-control observations
201 let n_current = remaining_indices.len();
202 let mut flagged_local: Vec<usize> = Vec::new();
203 for i in 0..n_current {
204 let is_flagged = (config.remove_t2_outliers && monitor.t2_alarm[i])
205 || (config.remove_spe_outliers && monitor.spe_alarm[i]);
206 if is_flagged {
207 flagged_local.push(i);
208 }
209 }
210
211 // Converged: no observations flagged
212 if flagged_local.is_empty() {
213 chart = Some(current_chart);
214 break;
215 }
216
217 // Check cumulative removal: total removed so far (including this batch)
218 // against the maximum allowed fraction of the ORIGINAL dataset.
219 // The 0.5 removal rate threshold is a practical heuristic: if more
220 // than half the remaining data is flagged in one iteration, the
221 // in-control model is likely misspecified rather than there being
222 // individual outliers. This aligns with the breakdown point of
223 // classical outlier detection methods (Rousseeuw & Leroy, 1987).
224 let total_removed = all_removed.len() + flagged_local.len();
225 if total_removed as f64 / n_original as f64 > config.max_removal_fraction {
226 chart = Some(current_chart);
227 break;
228 }
229
230 // Check if remaining after removal would be too few
231 let n_after = n_current - flagged_local.len();
232 if n_after < 4 {
233 chart = Some(current_chart);
234 break;
235 }
236
237 // Map flagged local indices back to original indices
238 let flagged_original: Vec<usize> = flagged_local
239 .iter()
240 .map(|&i| remaining_indices[i])
241 .collect();
242
243 // Update remaining_indices by removing flagged ones
244 let flagged_set: std::collections::HashSet<usize> = flagged_local.iter().copied().collect();
245 remaining_indices = remaining_indices
246 .iter()
247 .enumerate()
248 .filter(|(local_i, _)| !flagged_set.contains(local_i))
249 .map(|(_, &orig_i)| orig_i)
250 .collect();
251
252 let removal_rate = flagged_original.len() as f64 / n_current as f64;
253 removal_rates.push(removal_rate);
254 all_removed.extend_from_slice(&flagged_original);
255 removal_history.push(flagged_original);
256 }
257
258 // Build the final chart if we exhausted iterations without converging
259 let final_chart = match chart {
260 Some(c) => c,
261 None => {
262 let final_data = crate::cv::subset_rows(data, &remaining_indices);
263 spm_phase1(&final_data, argvals, &config.spm)?
264 }
265 };
266
267 Ok(IterativePhase1Result {
268 chart: final_chart,
269 n_iterations: removal_history.len(),
270 removed_indices: all_removed,
271 n_remaining: remaining_indices.len(),
272 removal_history,
273 removal_rates,
274 })
275}