fdars_core/spm/partial.rs
1//! Partial-domain monitoring for functional data.
2//!
3//! Monitors processes where only a partial observation of the functional
4//! domain is available (e.g., real-time monitoring of an ongoing process).
5//! Supports three strategies for handling the unobserved domain:
6//! - Conditional expectation (BLUP)
7//! - Partial projection (scaled inner products)
8//! - Zero padding
9//!
10//! # Mathematical framework
11//!
12//! The BLUP (Best Linear Unbiased Predictor) conditional expectation formula is:
13//!
14//! xi_hat = Lambda · Phi_obs^T · (Phi_obs · Lambda · Phi_obs^T + sigma^2 I)^{-1} · y_obs
15//!
16//! where Lambda = diag(eigenvalues) is the ncomp x ncomp prior covariance of
17//! the FPC scores, Phi_obs is the eigenfunction matrix restricted to observed
18//! grid points (n_obs x ncomp), sigma^2 is estimated from the median SPE
19//! (robust to outliers), and y_obs is the centered partial observation. Under
20//! Gaussian assumptions, this is the BLUP (Yao et al., 2005, section 3,
21//! pp. 580--583, Eq. 6).
22//!
23//! The domain fraction is computed as (argvals\[n_obs-1\] - argvals\[0\]) /
24//! (argvals\[m-1\] - argvals\[0\]), representing the proportion of the domain
25//! range covered by observed points. For a single observed point (range = 0),
26//! the point-count fraction n_obs/m is used as a fallback, consistent with
27//! the PACE framework where prediction from a single observation reduces to
28//! the marginal BLUP.
29//!
30//! # Numerical stability
31//!
32//! The ncomp x ncomp system matrix Phi_obs · Lambda · Phi_obs^T + sigma^2 I
33//! (equivalently, Lambda^{-1} + sigma^{-2} Phi_obs^T Phi_obs via the Woodbury
34//! identity) is solved via Cholesky factorization. If the matrix is near-singular
35//! (condition number proxy diag_max/diag_min > 10^12), progressive Tikhonov
36//! regularization is applied: the diagonal loading is increased by factors of 10
37//! until the factorization succeeds (up to 5 retries).
38//!
39//! # References
40//!
41//! - Yao, F., Muller, H.-G. & Wang, J.-L. (2005). Functional data analysis
42//! for sparse longitudinal data. *Journal of the American Statistical
43//! Association*, 100(470), 577--590, section 3, pp. 580--583 (PACE framework
44//! and BLUP derivation).
45
46use crate::error::FdarError;
47use crate::helpers::simpsons_weights;
48use crate::matrix::FdMatrix;
49
50use super::phase::SpmChart;
51use super::stats::hotelling_t2;
52
53/// Strategy for handling unobserved domain.
54///
55/// # Strategy comparison
56///
57/// - **ConditionalExpectation** (recommended): Best accuracy when the FPCA model
58/// is well-specified. Uses BLUP to predict scores from partial observations.
59/// Degrades gracefully as domain fraction decreases.
60/// - **PartialProjection**: Computationally cheapest. Scales inner products by
61/// domain fraction. Acceptable when domain fraction > 0.7.
62/// - **ZeroPad**: Simplest baseline. Fills unobserved region with the mean
63/// function. Biases scores toward zero for small domain fractions.
64#[derive(Debug, Clone, PartialEq)]
65#[non_exhaustive]
66pub enum DomainCompletion {
67 /// Partial inner products scaled by domain fraction.
68 PartialProjection,
69 /// Best Linear Unbiased Predictor (Yao et al., 2005).
70 ConditionalExpectation,
71 /// Pad unobserved region with the mean function.
72 ZeroPad,
73}
74
75/// Configuration for partial-domain monitoring.
76///
77/// For domain fractions below 0.3, all strategies produce increasingly
78/// uncertain estimates. The conditional expectation (BLUP) degrades most
79/// gracefully due to its optimal shrinkage properties.
80#[derive(Debug, Clone, PartialEq)]
81pub struct PartialDomainConfig {
82 /// Number of principal components (default 5).
83 pub ncomp: usize,
84 /// Significance level (default 0.05).
85 pub alpha: f64,
86 /// Domain completion strategy (default: ConditionalExpectation).
87 pub completion: DomainCompletion,
88 /// Tikhonov regularization strength for ill-conditioned systems (default 1e-10).
89 ///
90 /// Applied as Tikhonov regularization (diagonal loading) when the M matrix
91 /// condition number proxy exceeds 1e12. Increase to 1e-6 if you observe
92 /// numerical warnings or NaN in scores.
93 pub regularization_eps: f64,
94}
95
96impl Default for PartialDomainConfig {
97 fn default() -> Self {
98 Self {
99 ncomp: 5,
100 alpha: 0.05,
101 completion: DomainCompletion::ConditionalExpectation,
102 regularization_eps: 1e-10,
103 }
104 }
105}
106
107/// Result of partial-domain monitoring for a single observation.
108#[derive(Debug, Clone, PartialEq)]
109#[non_exhaustive]
110pub struct PartialMonitorResult {
111 /// Estimated FPC scores.
112 pub scores: Vec<f64>,
113 /// T-squared statistic.
114 pub t2: f64,
115 /// Whether T-squared exceeds the control limit.
116 pub t2_alarm: bool,
117 /// Fraction of domain observed.
118 pub domain_fraction: f64,
119 /// Completed curve (if using ConditionalExpectation or ZeroPad).
120 pub completed_curve: Option<Vec<f64>>,
121}
122
123/// Monitor a single partially-observed curve.
124///
125/// # Arguments
126/// * `chart` - Phase I SPM chart
127/// * `partial_values` - Observed values (length m, but only first `n_observed` are used)
128/// * `argvals` - Full grid points (length m)
129/// * `n_observed` - Number of observed grid points (from the start of the domain)
130/// * `config` - Partial domain configuration
131///
132/// For domain fractions below 0.3, conditional expectation accuracy degrades
133/// significantly. Consider collecting more of the domain or using a dedicated
134/// early-detection method.
135///
136/// # Example
137///
138/// ```
139/// use fdars_core::matrix::FdMatrix;
140/// use fdars_core::spm::phase::{spm_phase1, SpmConfig};
141/// use fdars_core::spm::partial::{spm_monitor_partial, PartialDomainConfig, DomainCompletion};
142/// let data = FdMatrix::from_column_major(
143/// (0..200).map(|i| (i as f64 * 0.1).sin()).collect(), 20, 10
144/// ).unwrap();
145/// let argvals: Vec<f64> = (0..10).map(|i| i as f64 / 9.0).collect();
146/// let chart = spm_phase1(&data, &argvals, &SpmConfig { ncomp: 2, ..SpmConfig::default() }).unwrap();
147/// let partial_values = vec![0.0, 0.1, 0.2, 0.3, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0];
148/// let config = PartialDomainConfig { ncomp: 2, completion: DomainCompletion::ZeroPad, ..PartialDomainConfig::default() };
149/// let result = spm_monitor_partial(&chart, &partial_values, &argvals, 5, &config).unwrap();
150/// assert!(result.domain_fraction > 0.0);
151/// ```
152///
153/// # Errors
154///
155/// Returns [`FdarError::InvalidDimension`] if dimensions are inconsistent.
156/// Returns [`FdarError::InvalidParameter`] if `n_observed` is 0, if `argvals`
157/// is not sorted, or if the computed domain fraction is out of range.
158#[must_use = "monitoring result should not be discarded"]
159pub fn spm_monitor_partial(
160 chart: &SpmChart,
161 partial_values: &[f64],
162 argvals: &[f64],
163 n_observed: usize,
164 config: &PartialDomainConfig,
165) -> Result<PartialMonitorResult, FdarError> {
166 let m = chart.fpca.mean.len();
167 if argvals.len() != m {
168 return Err(FdarError::InvalidDimension {
169 parameter: "argvals",
170 expected: format!("{m}"),
171 actual: format!("{}", argvals.len()),
172 });
173 }
174 if partial_values.len() < n_observed {
175 return Err(FdarError::InvalidDimension {
176 parameter: "partial_values",
177 expected: format!("at least {n_observed} values"),
178 actual: format!("{} values", partial_values.len()),
179 });
180 }
181 if n_observed == 0 {
182 return Err(FdarError::InvalidParameter {
183 parameter: "n_observed",
184 message: "n_observed must be at least 1".to_string(),
185 });
186 }
187 // Ensure argvals is sorted (strictly increasing endpoints)
188 if m > 1 && argvals[0] >= argvals[m - 1] {
189 return Err(FdarError::InvalidParameter {
190 parameter: "argvals",
191 message: "argvals must be sorted (first element must be less than last)".to_string(),
192 });
193 }
194
195 let ncomp = config.ncomp.min(chart.eigenvalues.len());
196 // Domain fraction: ratio of observed domain range to full range.
197 // For n_observed=1, the range is zero so we fall back to point-count fraction.
198 let domain_fraction = if m <= 1 {
199 1.0
200 } else {
201 let range_fraction =
202 (argvals[n_observed.min(m) - 1] - argvals[0]) / (argvals[m - 1] - argvals[0]);
203 if range_fraction > 0.0 {
204 range_fraction
205 } else {
206 // Single observed point: use point-count fraction as fallback.
207 // The point-count fraction n_obs/m is used when the range-based
208 // fraction is zero (single observed point). This is consistent
209 // with the PACE framework (Yao et al., 2005) where prediction
210 // from a single observation reduces to the marginal BLUP.
211 n_observed as f64 / m as f64
212 }
213 };
214 if !(0.0..=1.0).contains(&domain_fraction) {
215 return Err(FdarError::InvalidParameter {
216 parameter: "domain_fraction",
217 message: format!("computed domain_fraction must be in [0, 1], got {domain_fraction}"),
218 });
219 }
220
221 let (scores, completed_curve) = match &config.completion {
222 DomainCompletion::ConditionalExpectation => conditional_expectation(
223 chart,
224 partial_values,
225 argvals,
226 n_observed,
227 ncomp,
228 config.regularization_eps,
229 )?,
230 DomainCompletion::PartialProjection => {
231 let scores = partial_projection(chart, partial_values, argvals, n_observed, ncomp)?;
232 (scores, None)
233 }
234 DomainCompletion::ZeroPad => zero_pad(chart, partial_values, argvals, n_observed, ncomp)?,
235 };
236
237 // Compute T-squared
238 let eigenvalues = &chart.eigenvalues[..ncomp];
239 let score_mat = FdMatrix::from_column_major(scores.clone(), 1, ncomp)?;
240 let t2_vec = hotelling_t2(&score_mat, eigenvalues)?;
241 let t2 = t2_vec[0];
242 let t2_alarm = t2 > chart.t2_limit.ucl;
243
244 Ok(PartialMonitorResult {
245 scores,
246 t2,
247 t2_alarm,
248 domain_fraction,
249 completed_curve,
250 })
251}
252
253/// Monitor a batch of partially-observed curves.
254///
255/// # Arguments
256/// * `chart` - Phase I SPM chart
257/// * `partial_data` - Slice of (values, n_observed) pairs
258/// * `argvals` - Full grid points (length m)
259/// * `config` - Partial domain configuration
260///
261/// # Errors
262///
263/// Returns errors from individual monitoring calls.
264#[must_use = "monitoring results should not be discarded"]
265pub fn spm_monitor_partial_batch(
266 chart: &SpmChart,
267 partial_data: &[(&[f64], usize)],
268 argvals: &[f64],
269 config: &PartialDomainConfig,
270) -> Result<Vec<PartialMonitorResult>, FdarError> {
271 partial_data
272 .iter()
273 .map(|(values, n_obs)| spm_monitor_partial(chart, values, argvals, *n_obs, config))
274 .collect()
275}
276
277// -- Completion strategies ------------------------------------------------
278
279/// Conditional Expectation (BLUP) following Yao et al. (2005, section 3, Eq. 6):
280///
281/// The posterior score estimate under Gaussian assumptions is:
282/// xi_hat = Lambda · Phi_obs^T · (Phi_obs · Lambda · Phi_obs^T + sigma^2 I)^{-1} · y_obs
283///
284/// Implemented via the Woodbury identity as the equivalent small system:
285/// M · xi_hat = sigma^{-2} Phi_obs^T y_obs
286/// where M = Lambda^{-1} + sigma^{-2} Phi_obs^T Phi_obs (ncomp x ncomp).
287///
288/// Components:
289/// - Lambda = diag(eigenvalues) (ncomp x ncomp): prior score covariance
290/// - Phi_obs = rotation[0..n_observed, :] (n_observed x ncomp): eigenfunctions at observed points
291/// - y_obs = partial_values[0..n_observed] - mean[0..n_observed]: centered observations
292/// - sigma^2: measurement noise, estimated from the median Phase I SPE divided by m
293/// (robust to outliers; Yao et al. recommend median-based estimation)
294fn conditional_expectation(
295 chart: &SpmChart,
296 partial_values: &[f64],
297 _argvals: &[f64],
298 n_observed: usize,
299 ncomp: usize,
300 reg_eps: f64,
301) -> Result<(Vec<f64>, Option<Vec<f64>>), FdarError> {
302 let m = chart.fpca.mean.len();
303 let n_obs = n_observed.min(m);
304
305 // Centered observed values
306 let y_obs: Vec<f64> = (0..n_obs)
307 .map(|j| partial_values[j] - chart.fpca.mean[j])
308 .collect();
309
310 // Estimate sigma^2 from median SPE (robust to outliers, Yao et al. 2005)
311 let sigma2 = if !chart.spe_phase1.is_empty() {
312 let mut sorted_spe = chart.spe_phase1.clone();
313 sorted_spe.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
314 let mid = sorted_spe.len() / 2;
315 let median_spe = if sorted_spe.len() % 2 == 0 {
316 (sorted_spe[mid - 1] + sorted_spe[mid]) / 2.0
317 } else {
318 sorted_spe[mid]
319 };
320 // Normalize by grid size to get per-point variance
321 (median_spe / m as f64).max(1e-10)
322 } else {
323 1e-6
324 };
325
326 // Compute Phi_obs^T y_obs (ncomp vector)
327 let phi_t_y: Vec<f64> = (0..ncomp)
328 .map(|l| {
329 (0..n_obs)
330 .map(|j| chart.fpca.rotation[(j, l)] * y_obs[j])
331 .sum::<f64>()
332 })
333 .collect();
334
335 // Use the Woodbury identity to work with the small ncomp x ncomp system:
336 // M = Lambda^{-1} + sigma^{-2} Phi_obs^T Phi_obs
337 // scores = M^{-1} sigma^{-2} Phi_obs^T y_obs
338 let mut m_matrix = vec![0.0_f64; ncomp * ncomp];
339 for l in 0..ncomp {
340 // Diagonal: Lambda^{-1}
341 m_matrix[l * ncomp + l] += 1.0 / chart.eigenvalues[l];
342 // Add sigma^{-2} Phi^T Phi
343 for k in 0..ncomp {
344 let phi_t_phi: f64 = (0..n_obs)
345 .map(|j| chart.fpca.rotation[(j, l)] * chart.fpca.rotation[(j, k)])
346 .sum();
347 m_matrix[l * ncomp + k] += phi_t_phi / sigma2;
348 }
349 }
350
351 // Check condition number via diagonal ratio (cheap proxy)
352 let mut diag_min = f64::INFINITY;
353 let mut diag_max = 0.0_f64;
354 for l in 0..ncomp {
355 let d = m_matrix[l * ncomp + l];
356 if d > 0.0 {
357 diag_min = diag_min.min(d);
358 diag_max = diag_max.max(d);
359 }
360 }
361 if diag_max > 0.0 && diag_max / diag_min > 1e12 {
362 // Ill-conditioned: add Tikhonov regularization
363 let reg = diag_max * reg_eps;
364 for l in 0..ncomp {
365 m_matrix[l * ncomp + l] += reg;
366 }
367 }
368
369 // Right-hand side: sigma^{-2} Phi^T y
370 let rhs: Vec<f64> = phi_t_y.iter().map(|&v| v / sigma2).collect();
371
372 // Solve M * scores = rhs using Cholesky (M is SPD).
373 // If Cholesky fails (near-indefinite), retry with progressively stronger regularization.
374 let scores = match solve_spd(&m_matrix, &rhs, ncomp) {
375 Ok(s) => s,
376 Err(_) => {
377 // Retry with stronger Tikhonov regularization
378 let mut m_reg = m_matrix.clone();
379 let mut reg_strength = diag_max * 1e-8;
380 let mut result = None;
381 for _ in 0..5 {
382 for l in 0..ncomp {
383 m_reg[l * ncomp + l] = m_matrix[l * ncomp + l] + reg_strength;
384 }
385 if let Ok(s) = solve_spd(&m_reg, &rhs, ncomp) {
386 result = Some(s);
387 break;
388 }
389 reg_strength *= 10.0;
390 }
391 result.ok_or(FdarError::ComputationFailed {
392 operation: "conditional_expectation",
393 detail: "Cholesky failed even with strong regularization".to_string(),
394 })?
395 }
396 };
397
398 // Reconstruct the full curve: mean + Phi * scores
399 let mut completed = chart.fpca.mean.clone();
400 for j in 0..m {
401 for l in 0..ncomp {
402 completed[j] += chart.fpca.rotation[(j, l)] * scores[l];
403 }
404 }
405
406 Ok((scores, Some(completed)))
407}
408
409/// Partial projection: scale inner products by domain fraction.
410///
411/// The scaling factor (full_total / partial_total) compensates for the reduced
412/// integration domain, assuming the eigenfunction structure is approximately
413/// uniform across the domain. This is a first-order correction that degrades
414/// when eigenfunctions have localized support outside the observed region.
415fn partial_projection(
416 chart: &SpmChart,
417 partial_values: &[f64],
418 argvals: &[f64],
419 n_observed: usize,
420 ncomp: usize,
421) -> Result<Vec<f64>, FdarError> {
422 let m = chart.fpca.mean.len();
423 let n_obs = n_observed.min(m);
424
425 // Centered observed values
426 let y_obs: Vec<f64> = (0..n_obs)
427 .map(|j| partial_values[j] - chart.fpca.mean[j])
428 .collect();
429
430 // Integration weights for partial domain
431 let partialargvals = &argvals[..n_obs];
432 let weights = simpsons_weights(partialargvals);
433
434 // Full domain weights (for normalization)
435 let full_weights = simpsons_weights(argvals);
436 let full_total: f64 = full_weights.iter().sum();
437 let partial_total: f64 = weights.iter().sum();
438
439 // Scale factor: full_domain_width / partial_domain_width
440 let scale = if partial_total > 0.0 {
441 full_total / partial_total
442 } else {
443 1.0
444 };
445
446 // Compute scores as scaled partial inner products
447 let scores: Vec<f64> = (0..ncomp)
448 .map(|l| {
449 let ip: f64 = (0..n_obs)
450 .map(|j| y_obs[j] * chart.fpca.rotation[(j, l)] * weights[j])
451 .sum();
452 ip * scale
453 })
454 .collect();
455
456 Ok(scores)
457}
458
459/// Zero-pad: fill unobserved with mean, project normally.
460fn zero_pad(
461 chart: &SpmChart,
462 partial_values: &[f64],
463 _argvals: &[f64],
464 n_observed: usize,
465 ncomp: usize,
466) -> Result<(Vec<f64>, Option<Vec<f64>>), FdarError> {
467 let m = chart.fpca.mean.len();
468 let n_obs = n_observed.min(m);
469
470 // Build padded curve: observed values + mean for unobserved
471 let mut padded = chart.fpca.mean.clone();
472 padded[..n_obs].copy_from_slice(&partial_values[..n_obs]);
473
474 // Center and project
475 let mut centered = vec![0.0; m];
476 for j in 0..m {
477 centered[j] = padded[j] - chart.fpca.mean[j];
478 }
479
480 let scores: Vec<f64> = (0..ncomp)
481 .map(|l| {
482 (0..m)
483 .map(|j| centered[j] * chart.fpca.rotation[(j, l)])
484 .sum()
485 })
486 .collect();
487
488 Ok((scores, Some(padded)))
489}
490
491// -- Small linear algebra helpers -----------------------------------------
492
493/// Solve A*x = b where A is symmetric positive definite, via Cholesky.
494fn solve_spd(a: &[f64], b: &[f64], n: usize) -> Result<Vec<f64>, FdarError> {
495 // Cholesky factorization: A = L L^T
496 let mut l = vec![0.0_f64; n * n];
497
498 for j in 0..n {
499 let mut sum = 0.0;
500 for k in 0..j {
501 sum += l[j * n + k] * l[j * n + k];
502 }
503 let diag = a[j * n + j] - sum;
504 if diag <= 0.0 || diag.is_nan() {
505 return Err(FdarError::ComputationFailed {
506 operation: "cholesky",
507 detail: "matrix is not positive definite".to_string(),
508 });
509 }
510 l[j * n + j] = diag.sqrt();
511
512 for i in (j + 1)..n {
513 let mut sum = 0.0;
514 for k in 0..j {
515 sum += l[i * n + k] * l[j * n + k];
516 }
517 l[i * n + j] = (a[i * n + j] - sum) / l[j * n + j];
518 }
519 }
520
521 // Forward substitution: L y = b
522 let mut y = vec![0.0_f64; n];
523 for i in 0..n {
524 let mut sum = 0.0;
525 for k in 0..i {
526 sum += l[i * n + k] * y[k];
527 }
528 y[i] = (b[i] - sum) / l[i * n + i];
529 }
530
531 // Back substitution: L^T x = y
532 let mut x = vec![0.0_f64; n];
533 for i in (0..n).rev() {
534 let mut sum = 0.0;
535 for k in (i + 1)..n {
536 sum += l[k * n + i] * x[k];
537 }
538 x[i] = (y[i] - sum) / l[i * n + i];
539 }
540
541 Ok(x)
542}