fdars_core/
irreg_fdata.rs

1//! Irregular functional data operations.
2//!
3//! This module provides data structures and algorithms for functional data
4//! where observations have different evaluation points (irregular/sparse sampling).
5//!
6//! ## Storage Format
7//!
8//! Uses a CSR-like (Compressed Sparse Row) format for efficient storage:
9//! - `offsets[i]..offsets[i+1]` gives the slice indices for observation i
10//! - `argvals` and `values` store all data contiguously
11//!
12//! This format is memory-efficient and enables parallel processing of observations.
13//!
14//! ## Example
15//!
16//! For 3 curves with varying numbers of observation points:
17//! - Curve 0: 5 points
18//! - Curve 1: 3 points
19//! - Curve 2: 7 points
20//!
21//! The offsets would be: [0, 5, 8, 15]
22
23use rayon::prelude::*;
24
25/// Compressed storage for irregular functional data.
26///
27/// Uses CSR-style layout where each observation can have a different
28/// number of evaluation points.
29#[derive(Clone, Debug)]
30pub struct IrregFdata {
31    /// Start indices for each observation (length n+1)
32    /// `offsets[i]..offsets[i+1]` gives the range for observation i
33    pub offsets: Vec<usize>,
34    /// All observation points concatenated
35    pub argvals: Vec<f64>,
36    /// All values concatenated
37    pub values: Vec<f64>,
38    /// Domain range `[min, max]`
39    pub rangeval: [f64; 2],
40}
41
42impl IrregFdata {
43    /// Create from lists of argvals and values (one per observation).
44    ///
45    /// # Arguments
46    /// * `argvals_list` - List of observation point vectors
47    /// * `values_list` - List of value vectors (same lengths as argvals_list)
48    ///
49    /// # Panics
50    /// Panics if the lists have different lengths or if any pair has mismatched lengths.
51    pub fn from_lists(argvals_list: &[Vec<f64>], values_list: &[Vec<f64>]) -> Self {
52        let n = argvals_list.len();
53        assert_eq!(
54            n,
55            values_list.len(),
56            "argvals_list and values_list must have same length"
57        );
58
59        let mut offsets = Vec::with_capacity(n + 1);
60        offsets.push(0);
61
62        let total_points: usize = argvals_list.iter().map(|v| v.len()).sum();
63        let mut argvals = Vec::with_capacity(total_points);
64        let mut values = Vec::with_capacity(total_points);
65
66        let mut range_min = f64::INFINITY;
67        let mut range_max = f64::NEG_INFINITY;
68
69        for i in 0..n {
70            assert_eq!(
71                argvals_list[i].len(),
72                values_list[i].len(),
73                "Observation {} has mismatched argvals/values lengths",
74                i
75            );
76
77            argvals.extend_from_slice(&argvals_list[i]);
78            values.extend_from_slice(&values_list[i]);
79            offsets.push(argvals.len());
80
81            if let (Some(&min), Some(&max)) = (argvals_list[i].first(), argvals_list[i].last()) {
82                range_min = range_min.min(min);
83                range_max = range_max.max(max);
84            }
85        }
86
87        IrregFdata {
88            offsets,
89            argvals,
90            values,
91            rangeval: [range_min, range_max],
92        }
93    }
94
95    /// Create from flattened representation (for R interop).
96    ///
97    /// # Arguments
98    /// * `offsets` - Start indices (length n+1)
99    /// * `argvals` - All observation points concatenated
100    /// * `values` - All values concatenated
101    /// * `rangeval` - Domain range `[min, max]`
102    pub fn from_flat(
103        offsets: Vec<usize>,
104        argvals: Vec<f64>,
105        values: Vec<f64>,
106        rangeval: [f64; 2],
107    ) -> Self {
108        IrregFdata {
109            offsets,
110            argvals,
111            values,
112            rangeval,
113        }
114    }
115
116    /// Number of observations.
117    #[inline]
118    pub fn n_obs(&self) -> usize {
119        self.offsets.len().saturating_sub(1)
120    }
121
122    /// Number of points for observation i.
123    #[inline]
124    pub fn n_points(&self, i: usize) -> usize {
125        self.offsets[i + 1] - self.offsets[i]
126    }
127
128    /// Get observation i as a pair of slices (argvals, values).
129    #[inline]
130    pub fn get_obs(&self, i: usize) -> (&[f64], &[f64]) {
131        let start = self.offsets[i];
132        let end = self.offsets[i + 1];
133        (&self.argvals[start..end], &self.values[start..end])
134    }
135
136    /// Total number of observation points across all curves.
137    #[inline]
138    pub fn total_points(&self) -> usize {
139        self.argvals.len()
140    }
141
142    /// Get observation counts for all curves.
143    pub fn obs_counts(&self) -> Vec<usize> {
144        (0..self.n_obs()).map(|i| self.n_points(i)).collect()
145    }
146
147    /// Get minimum number of observations per curve.
148    pub fn min_obs(&self) -> usize {
149        (0..self.n_obs())
150            .map(|i| self.n_points(i))
151            .min()
152            .unwrap_or(0)
153    }
154
155    /// Get maximum number of observations per curve.
156    pub fn max_obs(&self) -> usize {
157        (0..self.n_obs())
158            .map(|i| self.n_points(i))
159            .max()
160            .unwrap_or(0)
161    }
162}
163
164// =============================================================================
165// Integration
166// =============================================================================
167
168/// Compute integral of each curve using trapezoidal rule.
169///
170/// For curve i with observation points t_1, ..., t_m and values x_1, ..., x_m:
171/// ∫f_i(t)dt ≈ Σ_{j=1}^{m-1} (t_{j+1} - t_j) * (x_{j+1} + x_j) / 2
172///
173/// # Arguments
174/// * `offsets` - Start indices (length n+1)
175/// * `argvals` - All observation points concatenated
176/// * `values` - All values concatenated
177///
178/// # Returns
179/// Vector of integrals, one per curve
180pub fn integrate_irreg(offsets: &[usize], argvals: &[f64], values: &[f64]) -> Vec<f64> {
181    let n = offsets.len() - 1;
182
183    (0..n)
184        .into_par_iter()
185        .map(|i| {
186            let start = offsets[i];
187            let end = offsets[i + 1];
188            let t = &argvals[start..end];
189            let x = &values[start..end];
190
191            if t.len() < 2 {
192                return 0.0;
193            }
194
195            let mut integral = 0.0;
196            for j in 1..t.len() {
197                let h = t[j] - t[j - 1];
198                integral += 0.5 * h * (x[j] + x[j - 1]);
199            }
200            integral
201        })
202        .collect()
203}
204
205/// Compute integral for IrregFdata struct.
206pub fn integrate_irreg_struct(ifd: &IrregFdata) -> Vec<f64> {
207    integrate_irreg(&ifd.offsets, &ifd.argvals, &ifd.values)
208}
209
210// =============================================================================
211// Norms
212// =============================================================================
213
214/// Compute Lp norm for each curve in irregular functional data.
215///
216/// ||f_i||_p = (∫|f_i(t)|^p dt)^{1/p}
217///
218/// Uses trapezoidal rule for integration.
219///
220/// # Arguments
221/// * `offsets` - Start indices (length n+1)
222/// * `argvals` - All observation points concatenated
223/// * `values` - All values concatenated
224/// * `p` - Norm order (p >= 1)
225///
226/// # Returns
227/// Vector of norms, one per curve
228pub fn norm_lp_irreg(offsets: &[usize], argvals: &[f64], values: &[f64], p: f64) -> Vec<f64> {
229    let n = offsets.len() - 1;
230
231    (0..n)
232        .into_par_iter()
233        .map(|i| {
234            let start = offsets[i];
235            let end = offsets[i + 1];
236            let t = &argvals[start..end];
237            let x = &values[start..end];
238
239            if t.len() < 2 {
240                return 0.0;
241            }
242
243            let mut integral = 0.0;
244            for j in 1..t.len() {
245                let h = t[j] - t[j - 1];
246                let val_left = x[j - 1].abs().powf(p);
247                let val_right = x[j].abs().powf(p);
248                integral += 0.5 * h * (val_left + val_right);
249            }
250            integral.powf(1.0 / p)
251        })
252        .collect()
253}
254
255// =============================================================================
256// Mean Estimation
257// =============================================================================
258
259/// Epanechnikov kernel function.
260#[inline]
261fn kernel_epanechnikov(u: f64) -> f64 {
262    if u.abs() <= 1.0 {
263        0.75 * (1.0 - u * u)
264    } else {
265        0.0
266    }
267}
268
269/// Gaussian kernel function.
270#[inline]
271fn kernel_gaussian(u: f64) -> f64 {
272    (-0.5 * u * u).exp() / (2.0 * std::f64::consts::PI).sqrt()
273}
274
275/// Estimate mean function at specified target points using kernel smoothing.
276///
277/// Uses local weighted averaging (Nadaraya-Watson estimator) at each target point:
278/// μ̂(t) = Σ_{i,j} K_h(t - t_{ij}) x_{ij} / Σ_{i,j} K_h(t - t_{ij})
279///
280/// # Arguments
281/// * `offsets` - Start indices (length n+1)
282/// * `argvals` - All observation points concatenated
283/// * `values` - All values concatenated
284/// * `target_argvals` - Points at which to estimate the mean
285/// * `bandwidth` - Kernel bandwidth
286/// * `kernel_type` - 0 for Epanechnikov, 1 for Gaussian
287///
288/// # Returns
289/// Estimated mean function values at target points
290pub fn mean_irreg(
291    offsets: &[usize],
292    argvals: &[f64],
293    values: &[f64],
294    target_argvals: &[f64],
295    bandwidth: f64,
296    kernel_type: i32,
297) -> Vec<f64> {
298    let n = offsets.len() - 1;
299    let kernel = match kernel_type {
300        0 => kernel_epanechnikov,
301        _ => kernel_gaussian,
302    };
303
304    target_argvals
305        .par_iter()
306        .map(|&t| {
307            let mut sum_weights = 0.0;
308            let mut sum_values = 0.0;
309
310            for i in 0..n {
311                let start = offsets[i];
312                let end = offsets[i + 1];
313                let obs_t = &argvals[start..end];
314                let obs_x = &values[start..end];
315
316                for (&ti, &xi) in obs_t.iter().zip(obs_x.iter()) {
317                    let u = (ti - t) / bandwidth;
318                    let w = kernel(u);
319                    sum_weights += w;
320                    sum_values += w * xi;
321                }
322            }
323
324            if sum_weights > 0.0 {
325                sum_values / sum_weights
326            } else {
327                f64::NAN
328            }
329        })
330        .collect()
331}
332
333// =============================================================================
334// Covariance Estimation
335// =============================================================================
336
337/// Estimate covariance at a grid of points using local linear smoothing.
338///
339/// # Arguments
340/// * `offsets` - Start indices (length n+1)
341/// * `argvals` - All observation points concatenated
342/// * `values` - All values concatenated
343/// * `mean_vals` - Pre-computed mean function at observation points (optional)
344/// * `s_grid` - First grid points for covariance
345/// * `t_grid` - Second grid points for covariance
346/// * `bandwidth` - Kernel bandwidth
347///
348/// # Returns
349/// Covariance matrix estimate at (s_grid, t_grid) points
350pub fn cov_irreg(
351    offsets: &[usize],
352    argvals: &[f64],
353    values: &[f64],
354    s_grid: &[f64],
355    t_grid: &[f64],
356    bandwidth: f64,
357) -> Vec<f64> {
358    let n = offsets.len() - 1;
359    let ns = s_grid.len();
360    let nt = t_grid.len();
361
362    // First estimate mean at all observation points
363    let mean_at_obs = mean_irreg(offsets, argvals, values, argvals, bandwidth, 1);
364
365    // Centered values
366    let centered: Vec<f64> = values
367        .iter()
368        .zip(mean_at_obs.iter())
369        .map(|(&v, &m)| v - m)
370        .collect();
371
372    // Estimate covariance at each (s, t) pair
373    let mut cov = vec![0.0; ns * nt];
374
375    for (si, &s) in s_grid.iter().enumerate() {
376        for (ti, &t) in t_grid.iter().enumerate() {
377            let mut sum_weights = 0.0;
378            let mut sum_products = 0.0;
379
380            // Sum over all pairs of observations within each curve
381            for i in 0..n {
382                let start = offsets[i];
383                let end = offsets[i + 1];
384                let obs_t = &argvals[start..end];
385                let obs_c = &centered[start..end];
386
387                for j1 in 0..obs_t.len() {
388                    for j2 in 0..obs_t.len() {
389                        let u1 = (obs_t[j1] - s) / bandwidth;
390                        let u2 = (obs_t[j2] - t) / bandwidth;
391
392                        let w1 = kernel_gaussian(u1);
393                        let w2 = kernel_gaussian(u2);
394                        let w = w1 * w2;
395
396                        sum_weights += w;
397                        sum_products += w * obs_c[j1] * obs_c[j2];
398                    }
399                }
400            }
401
402            if sum_weights > 0.0 {
403                cov[si + ti * ns] = sum_products / sum_weights;
404            }
405        }
406    }
407
408    cov
409}
410
411// =============================================================================
412// Distance Metrics
413// =============================================================================
414
415/// Compute Lp distance between two irregular curves.
416///
417/// Uses the union of observation points and linear interpolation.
418fn lp_distance_pair(t1: &[f64], x1: &[f64], t2: &[f64], x2: &[f64], p: f64) -> f64 {
419    // Create union of time points
420    let mut all_t: Vec<f64> = t1.iter().chain(t2.iter()).copied().collect();
421    all_t.sort_by(|a, b| a.partial_cmp(b).unwrap());
422    all_t.dedup();
423
424    // Filter to common range
425    let t_min = t1.first().unwrap().max(*t2.first().unwrap());
426    let t_max = t1.last().unwrap().min(*t2.last().unwrap());
427
428    let common_t: Vec<f64> = all_t
429        .into_iter()
430        .filter(|&t| t >= t_min && t <= t_max)
431        .collect();
432
433    if common_t.len() < 2 {
434        return f64::NAN;
435    }
436
437    // Interpolate both curves at common points
438    let y1: Vec<f64> = common_t.iter().map(|&t| linear_interp(t1, x1, t)).collect();
439    let y2: Vec<f64> = common_t.iter().map(|&t| linear_interp(t2, x2, t)).collect();
440
441    // Compute integral of |y1 - y2|^p
442    let mut integral = 0.0;
443    for j in 1..common_t.len() {
444        let h = common_t[j] - common_t[j - 1];
445        let val_left = (y1[j - 1] - y2[j - 1]).abs().powf(p);
446        let val_right = (y1[j] - y2[j]).abs().powf(p);
447        integral += 0.5 * h * (val_left + val_right);
448    }
449
450    integral.powf(1.0 / p)
451}
452
453/// Linear interpolation at point t.
454fn linear_interp(argvals: &[f64], values: &[f64], t: f64) -> f64 {
455    if t <= argvals[0] {
456        return values[0];
457    }
458    if t >= argvals[argvals.len() - 1] {
459        return values[values.len() - 1];
460    }
461
462    // Find the interval
463    let idx = argvals.iter().position(|&x| x > t).unwrap();
464    let t0 = argvals[idx - 1];
465    let t1 = argvals[idx];
466    let x0 = values[idx - 1];
467    let x1 = values[idx];
468
469    // Linear interpolation
470    x0 + (x1 - x0) * (t - t0) / (t1 - t0)
471}
472
473/// Compute pairwise Lp distances for irregular functional data.
474///
475/// # Arguments
476/// * `offsets` - Start indices (length n+1)
477/// * `argvals` - All observation points concatenated
478/// * `values` - All values concatenated
479/// * `p` - Norm order
480///
481/// # Returns
482/// Distance matrix (n × n) in column-major format
483pub fn metric_lp_irreg(offsets: &[usize], argvals: &[f64], values: &[f64], p: f64) -> Vec<f64> {
484    let n = offsets.len() - 1;
485    let mut dist = vec![0.0; n * n];
486
487    // Compute upper triangle in parallel
488    let pairs: Vec<(usize, usize)> = (0..n)
489        .flat_map(|i| (i + 1..n).map(move |j| (i, j)))
490        .collect();
491
492    let distances: Vec<f64> = pairs
493        .par_iter()
494        .map(|&(i, j)| {
495            let start_i = offsets[i];
496            let end_i = offsets[i + 1];
497            let start_j = offsets[j];
498            let end_j = offsets[j + 1];
499
500            lp_distance_pair(
501                &argvals[start_i..end_i],
502                &values[start_i..end_i],
503                &argvals[start_j..end_j],
504                &values[start_j..end_j],
505                p,
506            )
507        })
508        .collect();
509
510    // Fill symmetric matrix
511    for (k, &(i, j)) in pairs.iter().enumerate() {
512        dist[i + j * n] = distances[k];
513        dist[j + i * n] = distances[k];
514    }
515
516    dist
517}
518
519// =============================================================================
520// Conversion to Regular Grid
521// =============================================================================
522
523/// Convert irregular data to regular grid via linear interpolation.
524///
525/// Missing values (outside observation range) are marked as NaN.
526///
527/// # Arguments
528/// * `offsets` - Start indices (length n+1)
529/// * `argvals` - All observation points concatenated
530/// * `values` - All values concatenated
531/// * `target_grid` - Regular grid to interpolate to
532///
533/// # Returns
534/// Data matrix (n × len(target_grid)) in column-major format
535pub fn to_regular_grid(
536    offsets: &[usize],
537    argvals: &[f64],
538    values: &[f64],
539    target_grid: &[f64],
540) -> Vec<f64> {
541    let n = offsets.len() - 1;
542    let m = target_grid.len();
543
544    (0..n)
545        .into_par_iter()
546        .flat_map(|i| {
547            let start = offsets[i];
548            let end = offsets[i + 1];
549            let obs_t = &argvals[start..end];
550            let obs_x = &values[start..end];
551
552            target_grid
553                .iter()
554                .map(|&t| {
555                    if obs_t.is_empty() || t < obs_t[0] || t > obs_t[obs_t.len() - 1] {
556                        f64::NAN
557                    } else {
558                        linear_interp(obs_t, obs_x, t)
559                    }
560                })
561                .collect::<Vec<f64>>()
562        })
563        .collect::<Vec<f64>>()
564        // Transpose to column-major
565        .chunks(m)
566        .enumerate()
567        .fold(vec![0.0; n * m], |mut acc, (i, row)| {
568            for (j, &val) in row.iter().enumerate() {
569                acc[i + j * n] = val;
570            }
571            acc
572        })
573}
574
575#[cfg(test)]
576mod tests {
577    use super::*;
578
579    #[test]
580    fn test_from_lists() {
581        let argvals_list = vec![vec![0.0, 0.5, 1.0], vec![0.0, 1.0]];
582        let values_list = vec![vec![1.0, 2.0, 3.0], vec![1.0, 3.0]];
583
584        let ifd = IrregFdata::from_lists(&argvals_list, &values_list);
585
586        assert_eq!(ifd.n_obs(), 2);
587        assert_eq!(ifd.n_points(0), 3);
588        assert_eq!(ifd.n_points(1), 2);
589        assert_eq!(ifd.total_points(), 5);
590    }
591
592    #[test]
593    fn test_get_obs() {
594        let argvals_list = vec![vec![0.0, 0.5, 1.0], vec![0.0, 1.0]];
595        let values_list = vec![vec![1.0, 2.0, 3.0], vec![1.0, 3.0]];
596
597        let ifd = IrregFdata::from_lists(&argvals_list, &values_list);
598
599        let (t0, x0) = ifd.get_obs(0);
600        assert_eq!(t0, &[0.0, 0.5, 1.0]);
601        assert_eq!(x0, &[1.0, 2.0, 3.0]);
602
603        let (t1, x1) = ifd.get_obs(1);
604        assert_eq!(t1, &[0.0, 1.0]);
605        assert_eq!(x1, &[1.0, 3.0]);
606    }
607
608    #[test]
609    fn test_integrate_irreg() {
610        // Integrate constant function = 1 over [0, 1]
611        let offsets = vec![0, 3, 6];
612        let argvals = vec![0.0, 0.5, 1.0, 0.0, 0.5, 1.0];
613        let values = vec![1.0, 1.0, 1.0, 2.0, 2.0, 2.0];
614
615        let integrals = integrate_irreg(&offsets, &argvals, &values);
616
617        assert!((integrals[0] - 1.0).abs() < 1e-10);
618        assert!((integrals[1] - 2.0).abs() < 1e-10);
619    }
620
621    #[test]
622    fn test_norm_lp_irreg() {
623        // L2 norm of constant function = c is c (on \[0,1\])
624        let offsets = vec![0, 3];
625        let argvals = vec![0.0, 0.5, 1.0];
626        let values = vec![2.0, 2.0, 2.0];
627
628        let norms = norm_lp_irreg(&offsets, &argvals, &values, 2.0);
629
630        assert!((norms[0] - 2.0).abs() < 1e-10);
631    }
632
633    #[test]
634    fn test_linear_interp() {
635        let t = vec![0.0, 1.0, 2.0];
636        let x = vec![0.0, 2.0, 4.0];
637
638        assert!((linear_interp(&t, &x, 0.5) - 1.0).abs() < 1e-10);
639        assert!((linear_interp(&t, &x, 1.5) - 3.0).abs() < 1e-10);
640    }
641
642    #[test]
643    fn test_mean_irreg() {
644        // Two identical curves should give exact mean
645        let offsets = vec![0, 3, 6];
646        let argvals = vec![0.0, 0.5, 1.0, 0.0, 0.5, 1.0];
647        let values = vec![0.0, 1.0, 2.0, 0.0, 1.0, 2.0];
648
649        let target = vec![0.0, 0.5, 1.0];
650        let mean = mean_irreg(&offsets, &argvals, &values, &target, 0.5, 1);
651
652        // Mean should be close to the common values
653        assert!((mean[1] - 1.0).abs() < 0.3);
654    }
655}