Skip to main content

fdars_core/spm/
partial.rs

1//! Partial-domain monitoring for functional data.
2//!
3//! Monitors processes where only a partial observation of the functional
4//! domain is available (e.g., real-time monitoring of an ongoing process).
5//! Supports three strategies for handling the unobserved domain:
6//! - Conditional expectation (BLUP)
7//! - Partial projection (scaled inner products)
8//! - Zero padding
9//!
10//! # Mathematical framework
11//!
12//! The BLUP (Best Linear Unbiased Predictor) conditional expectation formula is:
13//!
14//! xi_hat = Lambda · Phi_obs^T · (Phi_obs · Lambda · Phi_obs^T + sigma^2 I)^{-1} · y_obs
15//!
16//! where Lambda = diag(eigenvalues) is the ncomp x ncomp prior covariance of
17//! the FPC scores, Phi_obs is the eigenfunction matrix restricted to observed
18//! grid points (n_obs x ncomp), sigma^2 is estimated from the median SPE
19//! (robust to outliers), and y_obs is the centered partial observation. Under
20//! Gaussian assumptions, this is the BLUP (Yao et al., 2005, section 3,
21//! pp. 580--583, Eq. 6).
22//!
23//! The domain fraction is computed as (argvals\[n_obs-1\] - argvals\[0\]) /
24//! (argvals\[m-1\] - argvals\[0\]), representing the proportion of the domain
25//! range covered by observed points. For a single observed point (range = 0),
26//! the point-count fraction n_obs/m is used as a fallback, consistent with
27//! the PACE framework where prediction from a single observation reduces to
28//! the marginal BLUP.
29//!
30//! # Numerical stability
31//!
32//! The ncomp x ncomp system matrix Phi_obs · Lambda · Phi_obs^T + sigma^2 I
33//! (equivalently, Lambda^{-1} + sigma^{-2} Phi_obs^T Phi_obs via the Woodbury
34//! identity) is solved via Cholesky factorization. If the matrix is near-singular
35//! (condition number proxy diag_max/diag_min > 10^12), progressive Tikhonov
36//! regularization is applied: the diagonal loading is increased by factors of 10
37//! until the factorization succeeds (up to 5 retries).
38//!
39//! # References
40//!
41//! - Yao, F., Muller, H.-G. & Wang, J.-L. (2005). Functional data analysis
42//!   for sparse longitudinal data. *Journal of the American Statistical
43//!   Association*, 100(470), 577--590, section 3, pp. 580--583 (PACE framework
44//!   and BLUP derivation).
45
46use crate::error::FdarError;
47use crate::helpers::simpsons_weights;
48use crate::matrix::FdMatrix;
49
50use super::phase::SpmChart;
51use super::stats::hotelling_t2;
52
53/// Strategy for handling unobserved domain.
54///
55/// # Strategy comparison
56///
57/// - **ConditionalExpectation** (recommended): Best accuracy when the FPCA model
58///   is well-specified. Uses BLUP to predict scores from partial observations.
59///   Degrades gracefully as domain fraction decreases.
60/// - **PartialProjection**: Computationally cheapest. Scales inner products by
61///   domain fraction. Acceptable when domain fraction > 0.7.
62/// - **ZeroPad**: Simplest baseline. Fills unobserved region with the mean
63///   function. Biases scores toward zero for small domain fractions.
64#[derive(Debug, Clone, PartialEq)]
65#[non_exhaustive]
66pub enum DomainCompletion {
67    /// Partial inner products scaled by domain fraction.
68    PartialProjection,
69    /// Best Linear Unbiased Predictor (Yao et al., 2005).
70    ConditionalExpectation,
71    /// Pad unobserved region with the mean function.
72    ZeroPad,
73}
74
75/// Configuration for partial-domain monitoring.
76///
77/// For domain fractions below 0.3, all strategies produce increasingly
78/// uncertain estimates. The conditional expectation (BLUP) degrades most
79/// gracefully due to its optimal shrinkage properties.
80#[derive(Debug, Clone, PartialEq)]
81pub struct PartialDomainConfig {
82    /// Number of principal components (default 5).
83    pub ncomp: usize,
84    /// Significance level (default 0.05).
85    pub alpha: f64,
86    /// Domain completion strategy (default: ConditionalExpectation).
87    pub completion: DomainCompletion,
88    /// Tikhonov regularization strength for ill-conditioned systems (default 1e-10).
89    ///
90    /// Applied as Tikhonov regularization (diagonal loading) when the M matrix
91    /// condition number proxy exceeds 1e12. Increase to 1e-6 if you observe
92    /// numerical warnings or NaN in scores.
93    pub regularization_eps: f64,
94}
95
96impl Default for PartialDomainConfig {
97    fn default() -> Self {
98        Self {
99            ncomp: 5,
100            alpha: 0.05,
101            completion: DomainCompletion::ConditionalExpectation,
102            regularization_eps: 1e-10,
103        }
104    }
105}
106
107/// Result of partial-domain monitoring for a single observation.
108#[derive(Debug, Clone, PartialEq)]
109#[non_exhaustive]
110pub struct PartialMonitorResult {
111    /// Estimated FPC scores.
112    pub scores: Vec<f64>,
113    /// T-squared statistic.
114    pub t2: f64,
115    /// Whether T-squared exceeds the control limit.
116    pub t2_alarm: bool,
117    /// Fraction of domain observed.
118    pub domain_fraction: f64,
119    /// Completed curve (if using ConditionalExpectation or ZeroPad).
120    pub completed_curve: Option<Vec<f64>>,
121}
122
123/// Monitor a single partially-observed curve.
124///
125/// # Arguments
126/// * `chart` - Phase I SPM chart
127/// * `partial_values` - Observed values (length m, but only first `n_observed` are used)
128/// * `argvals` - Full grid points (length m)
129/// * `n_observed` - Number of observed grid points (from the start of the domain)
130/// * `config` - Partial domain configuration
131///
132/// For domain fractions below 0.3, conditional expectation accuracy degrades
133/// significantly. Consider collecting more of the domain or using a dedicated
134/// early-detection method.
135///
136/// # Example
137///
138/// ```
139/// use fdars_core::matrix::FdMatrix;
140/// use fdars_core::spm::phase::{spm_phase1, SpmConfig};
141/// use fdars_core::spm::partial::{spm_monitor_partial, PartialDomainConfig, DomainCompletion};
142/// let data = FdMatrix::from_column_major(
143///     (0..200).map(|i| (i as f64 * 0.1).sin()).collect(), 20, 10
144/// ).unwrap();
145/// let argvals: Vec<f64> = (0..10).map(|i| i as f64 / 9.0).collect();
146/// let chart = spm_phase1(&data, &argvals, &SpmConfig { ncomp: 2, ..SpmConfig::default() }).unwrap();
147/// let partial_values = vec![0.0, 0.1, 0.2, 0.3, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0];
148/// let config = PartialDomainConfig { ncomp: 2, completion: DomainCompletion::ZeroPad, ..PartialDomainConfig::default() };
149/// let result = spm_monitor_partial(&chart, &partial_values, &argvals, 5, &config).unwrap();
150/// assert!(result.domain_fraction > 0.0);
151/// ```
152///
153/// # Errors
154///
155/// Returns [`FdarError::InvalidDimension`] if dimensions are inconsistent.
156/// Returns [`FdarError::InvalidParameter`] if `n_observed` is 0, if `argvals`
157/// is not sorted, or if the computed domain fraction is out of range.
158#[must_use = "monitoring result should not be discarded"]
159pub fn spm_monitor_partial(
160    chart: &SpmChart,
161    partial_values: &[f64],
162    argvals: &[f64],
163    n_observed: usize,
164    config: &PartialDomainConfig,
165) -> Result<PartialMonitorResult, FdarError> {
166    let m = chart.fpca.mean.len();
167    if argvals.len() != m {
168        return Err(FdarError::InvalidDimension {
169            parameter: "argvals",
170            expected: format!("{m}"),
171            actual: format!("{}", argvals.len()),
172        });
173    }
174    if partial_values.len() < n_observed {
175        return Err(FdarError::InvalidDimension {
176            parameter: "partial_values",
177            expected: format!("at least {n_observed} values"),
178            actual: format!("{} values", partial_values.len()),
179        });
180    }
181    if n_observed == 0 {
182        return Err(FdarError::InvalidParameter {
183            parameter: "n_observed",
184            message: "n_observed must be at least 1".to_string(),
185        });
186    }
187    // Ensure argvals is sorted (strictly increasing endpoints)
188    if m > 1 && argvals[0] >= argvals[m - 1] {
189        return Err(FdarError::InvalidParameter {
190            parameter: "argvals",
191            message: "argvals must be sorted (first element must be less than last)".to_string(),
192        });
193    }
194
195    let ncomp = config.ncomp.min(chart.eigenvalues.len());
196    // Domain fraction: ratio of observed domain range to full range.
197    // For n_observed=1, the range is zero so we fall back to point-count fraction.
198    let domain_fraction = if m <= 1 {
199        1.0
200    } else {
201        let range_fraction =
202            (argvals[n_observed.min(m) - 1] - argvals[0]) / (argvals[m - 1] - argvals[0]);
203        if range_fraction > 0.0 {
204            range_fraction
205        } else {
206            // Single observed point: use point-count fraction as fallback.
207            // The point-count fraction n_obs/m is used when the range-based
208            // fraction is zero (single observed point). This is consistent
209            // with the PACE framework (Yao et al., 2005) where prediction
210            // from a single observation reduces to the marginal BLUP.
211            n_observed as f64 / m as f64
212        }
213    };
214    if !(0.0..=1.0).contains(&domain_fraction) {
215        return Err(FdarError::InvalidParameter {
216            parameter: "domain_fraction",
217            message: format!("computed domain_fraction must be in [0, 1], got {domain_fraction}"),
218        });
219    }
220
221    let (scores, completed_curve) = match &config.completion {
222        DomainCompletion::ConditionalExpectation => conditional_expectation(
223            chart,
224            partial_values,
225            argvals,
226            n_observed,
227            ncomp,
228            config.regularization_eps,
229        )?,
230        DomainCompletion::PartialProjection => {
231            let scores = partial_projection(chart, partial_values, argvals, n_observed, ncomp)?;
232            (scores, None)
233        }
234        DomainCompletion::ZeroPad => zero_pad(chart, partial_values, argvals, n_observed, ncomp)?,
235    };
236
237    // Compute T-squared
238    let eigenvalues = &chart.eigenvalues[..ncomp];
239    let score_mat = FdMatrix::from_column_major(scores.clone(), 1, ncomp)?;
240    let t2_vec = hotelling_t2(&score_mat, eigenvalues)?;
241    let t2 = t2_vec[0];
242    let t2_alarm = t2 > chart.t2_limit.ucl;
243
244    Ok(PartialMonitorResult {
245        scores,
246        t2,
247        t2_alarm,
248        domain_fraction,
249        completed_curve,
250    })
251}
252
253/// Monitor a batch of partially-observed curves.
254///
255/// # Arguments
256/// * `chart` - Phase I SPM chart
257/// * `partial_data` - Slice of (values, n_observed) pairs
258/// * `argvals` - Full grid points (length m)
259/// * `config` - Partial domain configuration
260///
261/// # Errors
262///
263/// Returns errors from individual monitoring calls.
264#[must_use = "monitoring results should not be discarded"]
265pub fn spm_monitor_partial_batch(
266    chart: &SpmChart,
267    partial_data: &[(&[f64], usize)],
268    argvals: &[f64],
269    config: &PartialDomainConfig,
270) -> Result<Vec<PartialMonitorResult>, FdarError> {
271    partial_data
272        .iter()
273        .map(|(values, n_obs)| spm_monitor_partial(chart, values, argvals, *n_obs, config))
274        .collect()
275}
276
277// -- Completion strategies ------------------------------------------------
278
279/// Conditional Expectation (BLUP) following Yao et al. (2005, section 3, Eq. 6):
280///
281/// The posterior score estimate under Gaussian assumptions is:
282///   xi_hat = Lambda · Phi_obs^T · (Phi_obs · Lambda · Phi_obs^T + sigma^2 I)^{-1} · y_obs
283///
284/// Implemented via the Woodbury identity as the equivalent small system:
285///   M · xi_hat = sigma^{-2} Phi_obs^T y_obs
286/// where M = Lambda^{-1} + sigma^{-2} Phi_obs^T Phi_obs (ncomp x ncomp).
287///
288/// Components:
289/// - Lambda = diag(eigenvalues) (ncomp x ncomp): prior score covariance
290/// - Phi_obs = rotation[0..n_observed, :] (n_observed x ncomp): eigenfunctions at observed points
291/// - y_obs = partial_values[0..n_observed] - mean[0..n_observed]: centered observations
292/// - sigma^2: measurement noise, estimated from the median Phase I SPE divided by m
293///   (robust to outliers; Yao et al. recommend median-based estimation)
294fn conditional_expectation(
295    chart: &SpmChart,
296    partial_values: &[f64],
297    _argvals: &[f64],
298    n_observed: usize,
299    ncomp: usize,
300    reg_eps: f64,
301) -> Result<(Vec<f64>, Option<Vec<f64>>), FdarError> {
302    let m = chart.fpca.mean.len();
303    let n_obs = n_observed.min(m);
304
305    // Centered observed values
306    let y_obs: Vec<f64> = (0..n_obs)
307        .map(|j| partial_values[j] - chart.fpca.mean[j])
308        .collect();
309
310    // Estimate sigma^2 from median SPE (robust to outliers, Yao et al. 2005)
311    let sigma2 = if !chart.spe_phase1.is_empty() {
312        let mut sorted_spe = chart.spe_phase1.clone();
313        sorted_spe.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
314        let mid = sorted_spe.len() / 2;
315        let median_spe = if sorted_spe.len() % 2 == 0 {
316            (sorted_spe[mid - 1] + sorted_spe[mid]) / 2.0
317        } else {
318            sorted_spe[mid]
319        };
320        // Normalize by grid size to get per-point variance
321        (median_spe / m as f64).max(1e-10)
322    } else {
323        1e-6
324    };
325
326    // Compute Phi_obs^T y_obs (ncomp vector)
327    let phi_t_y: Vec<f64> = (0..ncomp)
328        .map(|l| {
329            (0..n_obs)
330                .map(|j| chart.fpca.rotation[(j, l)] * y_obs[j])
331                .sum::<f64>()
332        })
333        .collect();
334
335    // Use the Woodbury identity to work with the small ncomp x ncomp system:
336    // M = Lambda^{-1} + sigma^{-2} Phi_obs^T Phi_obs
337    // scores = M^{-1} sigma^{-2} Phi_obs^T y_obs
338    let mut m_matrix = vec![0.0_f64; ncomp * ncomp];
339    for l in 0..ncomp {
340        // Diagonal: Lambda^{-1}
341        m_matrix[l * ncomp + l] += 1.0 / chart.eigenvalues[l];
342        // Add sigma^{-2} Phi^T Phi
343        for k in 0..ncomp {
344            let phi_t_phi: f64 = (0..n_obs)
345                .map(|j| chart.fpca.rotation[(j, l)] * chart.fpca.rotation[(j, k)])
346                .sum();
347            m_matrix[l * ncomp + k] += phi_t_phi / sigma2;
348        }
349    }
350
351    // Check condition number via diagonal ratio (cheap proxy)
352    let mut diag_min = f64::INFINITY;
353    let mut diag_max = 0.0_f64;
354    for l in 0..ncomp {
355        let d = m_matrix[l * ncomp + l];
356        if d > 0.0 {
357            diag_min = diag_min.min(d);
358            diag_max = diag_max.max(d);
359        }
360    }
361    if diag_max > 0.0 && diag_max / diag_min > 1e12 {
362        // Ill-conditioned: add Tikhonov regularization
363        let reg = diag_max * reg_eps;
364        for l in 0..ncomp {
365            m_matrix[l * ncomp + l] += reg;
366        }
367    }
368
369    // Right-hand side: sigma^{-2} Phi^T y
370    let rhs: Vec<f64> = phi_t_y.iter().map(|&v| v / sigma2).collect();
371
372    // Solve M * scores = rhs using Cholesky (M is SPD).
373    // If Cholesky fails (near-indefinite), retry with progressively stronger regularization.
374    let scores = match solve_spd(&m_matrix, &rhs, ncomp) {
375        Ok(s) => s,
376        Err(_) => {
377            // Retry with stronger Tikhonov regularization
378            let mut m_reg = m_matrix.clone();
379            let mut reg_strength = diag_max * 1e-8;
380            let mut result = None;
381            for _ in 0..5 {
382                for l in 0..ncomp {
383                    m_reg[l * ncomp + l] = m_matrix[l * ncomp + l] + reg_strength;
384                }
385                if let Ok(s) = solve_spd(&m_reg, &rhs, ncomp) {
386                    result = Some(s);
387                    break;
388                }
389                reg_strength *= 10.0;
390            }
391            result.ok_or(FdarError::ComputationFailed {
392                operation: "conditional_expectation",
393                detail: "Cholesky failed even with strong regularization".to_string(),
394            })?
395        }
396    };
397
398    // Reconstruct the full curve: mean + Phi * scores
399    let mut completed = chart.fpca.mean.clone();
400    for j in 0..m {
401        for l in 0..ncomp {
402            completed[j] += chart.fpca.rotation[(j, l)] * scores[l];
403        }
404    }
405
406    Ok((scores, Some(completed)))
407}
408
409/// Partial projection: scale inner products by domain fraction.
410///
411/// The scaling factor (full_total / partial_total) compensates for the reduced
412/// integration domain, assuming the eigenfunction structure is approximately
413/// uniform across the domain. This is a first-order correction that degrades
414/// when eigenfunctions have localized support outside the observed region.
415fn partial_projection(
416    chart: &SpmChart,
417    partial_values: &[f64],
418    argvals: &[f64],
419    n_observed: usize,
420    ncomp: usize,
421) -> Result<Vec<f64>, FdarError> {
422    let m = chart.fpca.mean.len();
423    let n_obs = n_observed.min(m);
424
425    // Centered observed values
426    let y_obs: Vec<f64> = (0..n_obs)
427        .map(|j| partial_values[j] - chart.fpca.mean[j])
428        .collect();
429
430    // Integration weights for partial domain
431    let partialargvals = &argvals[..n_obs];
432    let weights = simpsons_weights(partialargvals);
433
434    // Full domain weights (for normalization)
435    let full_weights = simpsons_weights(argvals);
436    let full_total: f64 = full_weights.iter().sum();
437    let partial_total: f64 = weights.iter().sum();
438
439    // Scale factor: full_domain_width / partial_domain_width
440    let scale = if partial_total > 0.0 {
441        full_total / partial_total
442    } else {
443        1.0
444    };
445
446    // Compute scores as scaled partial inner products
447    let scores: Vec<f64> = (0..ncomp)
448        .map(|l| {
449            let ip: f64 = (0..n_obs)
450                .map(|j| y_obs[j] * chart.fpca.rotation[(j, l)] * weights[j])
451                .sum();
452            ip * scale
453        })
454        .collect();
455
456    Ok(scores)
457}
458
459/// Zero-pad: fill unobserved with mean, project normally.
460fn zero_pad(
461    chart: &SpmChart,
462    partial_values: &[f64],
463    _argvals: &[f64],
464    n_observed: usize,
465    ncomp: usize,
466) -> Result<(Vec<f64>, Option<Vec<f64>>), FdarError> {
467    let m = chart.fpca.mean.len();
468    let n_obs = n_observed.min(m);
469
470    // Build padded curve: observed values + mean for unobserved
471    let mut padded = chart.fpca.mean.clone();
472    padded[..n_obs].copy_from_slice(&partial_values[..n_obs]);
473
474    // Center and project
475    let mut centered = vec![0.0; m];
476    for j in 0..m {
477        centered[j] = padded[j] - chart.fpca.mean[j];
478    }
479
480    let scores: Vec<f64> = (0..ncomp)
481        .map(|l| {
482            (0..m)
483                .map(|j| centered[j] * chart.fpca.rotation[(j, l)])
484                .sum()
485        })
486        .collect();
487
488    Ok((scores, Some(padded)))
489}
490
491// -- Small linear algebra helpers -----------------------------------------
492
493/// Solve A*x = b where A is symmetric positive definite, via Cholesky.
494fn solve_spd(a: &[f64], b: &[f64], n: usize) -> Result<Vec<f64>, FdarError> {
495    // Cholesky factorization: A = L L^T
496    let mut l = vec![0.0_f64; n * n];
497
498    for j in 0..n {
499        let mut sum = 0.0;
500        for k in 0..j {
501            sum += l[j * n + k] * l[j * n + k];
502        }
503        let diag = a[j * n + j] - sum;
504        if diag <= 0.0 || diag.is_nan() {
505            return Err(FdarError::ComputationFailed {
506                operation: "cholesky",
507                detail: "matrix is not positive definite".to_string(),
508            });
509        }
510        l[j * n + j] = diag.sqrt();
511
512        for i in (j + 1)..n {
513            let mut sum = 0.0;
514            for k in 0..j {
515                sum += l[i * n + k] * l[j * n + k];
516            }
517            l[i * n + j] = (a[i * n + j] - sum) / l[j * n + j];
518        }
519    }
520
521    // Forward substitution: L y = b
522    let mut y = vec![0.0_f64; n];
523    for i in 0..n {
524        let mut sum = 0.0;
525        for k in 0..i {
526            sum += l[i * n + k] * y[k];
527        }
528        y[i] = (b[i] - sum) / l[i * n + i];
529    }
530
531    // Back substitution: L^T x = y
532    let mut x = vec![0.0_f64; n];
533    for i in (0..n).rev() {
534        let mut sum = 0.0;
535        for k in (i + 1)..n {
536            sum += l[k * n + i] * x[k];
537        }
538        x[i] = (y[i] - sum) / l[i * n + i];
539    }
540
541    Ok(x)
542}