lowess/
utils.rs

1//! Utility functions for LOWESS smoothing.
2//!
3//! This module provides lightweight, production-safe helpers used throughout
4//! the LOWESS pipeline. The functions are intentionally defensive: they
5//! validate inputs, document preconditions, and return safe values on
6//! degenerate inputs rather than panicking in release builds.
7//!
8//! Global expectations
9//! - Callers should prefer to pre-clean data (remove NaNs/infs) and sort `x`
10//!   when windowing behavior depends on monotonic order. Several helpers
11//!   assert correctness with debug-only checks but return safe outputs in
12//!   production paths.
13//!
14//! Key parameters and their semantics
15//! - x: slice of independent variable values. Many helpers assume `x` is
16//!   sorted ascending; if not, use `sort_by_x` before window-based ops.
17//! - y: slice of dependent variable values; must be aligned with `x`.
18//! - frac: smoothing fraction in (0, 1]. Used to compute window sizes.
19//!   validate_inputs will reject non-finite or out-of-range values.
20//! - window_size (usize): discrete neighborhood size computed from `frac`
21//!   and `n` via `calculate_window_size`. Always at least 2 and at most `n`.
22//! - delta: interpolation distance threshold (`Option<T>` or T). When None
23//!   `calculate_delta` returns a conservative default (~1% of x-range). A
24//!   zero or negative delta disables the skip/fast-path logic.
25//! - left/right/current/idx: integer indices describing the local window
26//!   boundaries; many window helpers return clamped values in [0, n-1].
27//!
28//! Important functions (brief)
29//! - validate_inputs(x, y, frac): checks lengths, finiteness, minimum points,
30//!   and fraction bounds. Returns a Result for early failure in callers.
31//! - validate_confidence_level(level): ensure level ∈ (0,1) for interval code.
32//! - validate_delta(delta): ensures delta ≥ 0 and finite.
33//! - sort_by_x: stable mapping of x/y to sorted order (returns new Vecs).
34//! - calculate_delta(delta, x_sorted): resolves optional delta to a numeric
35//!   value (defaults to 1% of range for None).
36//! - calculate_window_size(n, frac): converts fractional span to integer
37//!   window size with safe min/max clamping.
38//! - initialize_window / update_window: establish and slide local windows
39//!   while keeping them valid for regression computations.
40//! - interpolate_gap: linear interpolation between two fitted points; used
41//!   when delta indicates points can be filled in rather than re-fit.
42//! - skip_close_points: delta-driven fast-path to skip/refill sequences of
43//!   points close to the last fitted x. Also handles identical-x ties by
44//!   copying the last fitted value for stability.
45//! - normalize_weights / normalize_all_weights: numeric-safe normalization
46//!   with uniform fallback when total weight is (near) zero.
47//! - compute_range / find_rightmost_point: range and threshold helpers used
48//!   by windowing and delta logic.
49//! - compute_weighted_average / is_effectively_zero: small numeric utilities
50//!   used widely across fitting and diagnostics code.
51//!
52//! Production recommendations
53//! - Validate and deduplicate inputs upstream for large datasets. The helpers
54//!   here validate and handle edge cases, but upstream cleaning improves
55//!   determinism and performance.
56//! - Use `delta` (default ~1% of range) for dense inputs to accelerate fitting.
57//! - Choose `frac` in (0, 1]; cross-validation at the builder layer is
58//!   recommended for data-driven selection.
59
60#[cfg(not(feature = "std"))]
61extern crate alloc;
62#[cfg(not(feature = "std"))]
63use alloc::vec::Vec;
64
65use crate::{LowessError, Result};
66use core::cmp::Ordering;
67use num_traits::Float;
68
69// ============================================================================
70// Input Validation
71// ============================================================================
72
73/// Validate input arrays for LOWESS smoothing.
74pub fn validate_inputs<T: Float>(x: &[T], y: &[T], frac: T) -> Result<()> {
75    // Empty input is an error
76    if x.is_empty() || y.is_empty() {
77        return Err(LowessError::EmptyInput);
78    }
79
80    // Length mismatch
81    if x.len() != y.len() {
82        return Err(LowessError::MismatchedInputs {
83            x_len: x.len(),
84            y_len: y.len(),
85        });
86    }
87
88    // Require at least two points for a meaningful local regression
89    if x.len() < 2 {
90        return Err(LowessError::TooFewPoints {
91            got: x.len(),
92            min: 2,
93        });
94    }
95
96    // Reject NaN / infinite in inputs
97    for (i, &xi) in x.iter().enumerate() {
98        if !xi.is_finite() {
99            return Err(LowessError::InvalidNumericValue(format!(
100                "x[{}]={}",
101                i,
102                xi.to_f64().unwrap_or(f64::NAN)
103            )));
104        }
105    }
106    for (i, &yi) in y.iter().enumerate() {
107        if !yi.is_finite() {
108            return Err(LowessError::InvalidNumericValue(format!(
109                "y[{}]={}",
110                i,
111                yi.to_f64().unwrap_or(f64::NAN)
112            )));
113        }
114    }
115
116    // Validate fraction
117    if !frac.is_finite() || frac <= T::zero() || frac > T::one() {
118        return Err(LowessError::InvalidFraction(
119            frac.to_f64().unwrap_or(f64::NAN),
120        ));
121    }
122
123    Ok(())
124}
125
126/// Validate confidence level parameter.
127pub fn validate_confidence_level<T: Float>(level: T) -> Result<()> {
128    if !level.is_finite() || level <= T::zero() || level >= T::one() {
129        return Err(LowessError::InvalidConfidenceLevel(
130            level.to_f64().unwrap_or(f64::NAN),
131        ));
132    }
133    Ok(())
134}
135
136/// Validate delta parameter.
137pub fn validate_delta<T: Float>(delta: T) -> Result<()> {
138    if !delta.is_finite() || delta < T::zero() {
139        return Err(LowessError::InvalidDelta(
140            delta.to_f64().unwrap_or(f64::NAN),
141        ));
142    }
143    Ok(())
144}
145
146// ============================================================================
147// Data Preparation
148// ============================================================================
149
150/// Sort data by x values and return sorted (x, y) pairs.
151pub fn sort_by_x<T: Float>(x: &[T], y: &[T]) -> (Vec<T>, Vec<T>) {
152    debug_assert_eq!(x.len(), y.len(), "Length mismatch in sort_by_x");
153    let mut indices: Vec<usize> = (0..x.len()).collect();
154    indices.sort_unstable_by(|&i, &j| x[i].partial_cmp(&x[j]).unwrap_or(Ordering::Equal));
155    let x_sorted = indices.iter().map(|&i| x[i]).collect();
156    let y_sorted = indices.iter().map(|&i| y[i]).collect();
157    (x_sorted, y_sorted)
158}
159
160/// Check if x values are sorted in ascending order.
161pub fn is_sorted<T: Float>(x: &[T]) -> bool {
162    x.windows(2).all(|w| w[0] <= w[1])
163}
164
165// ============================================================================
166// Delta Calculation
167// ============================================================================
168
169/// Calculate delta parameter for interpolation optimization.
170pub fn calculate_delta<T: Float>(delta: Option<T>, x_sorted: &[T]) -> Result<T> {
171    match delta {
172        Some(d) => {
173            validate_delta(d)?;
174            Ok(d)
175        }
176        None => {
177            if x_sorted.is_empty() {
178                Ok(T::zero())
179            } else {
180                let range = x_sorted[x_sorted.len() - 1] - x_sorted[0];
181                Ok(T::from(0.01).unwrap() * range)
182            }
183        }
184    }
185}
186
187// ============================================================================
188// Window Size Calculation
189// ============================================================================
190
191/// Calculate window size from fraction and number of points.
192pub fn calculate_window_size<T: Float>(n: usize, frac: T) -> usize {
193    let frac_n = frac * T::from(n).unwrap();
194    let frac_n_int = frac_n.to_usize().unwrap_or(0);
195    usize::max(2, usize::min(n, frac_n_int))
196}
197
198// ============================================================================
199// Window Management
200// ============================================================================
201
202/// Update window boundaries to maintain optimal window around current point.
203pub fn update_window<T: Float>(
204    x: &[T],
205    current: usize,
206    left: &mut usize,
207    right: &mut usize,
208    n: usize,
209) {
210    while *right < n - 1 {
211        let d_left = x[current] - x[*left];
212        let d_right = x[*right + 1] - x[current];
213        if d_left <= d_right {
214            break;
215        }
216        *left += 1;
217        *right += 1;
218    }
219}
220
221/// Initialize window boundaries for a given point.
222pub fn initialize_window(idx: usize, window_size: usize, n: usize) -> (usize, usize) {
223    // If requested window covers (or exceeds) the whole array, return full range.
224    if window_size >= n {
225        return (0, n.saturating_sub(1));
226    }
227
228    let half = window_size / 2;
229    // Start by centering the window around idx.
230    let mut left = idx.saturating_sub(half);
231
232    // Clamp left so the window fits in [0, n-1] and has the requested size.
233    // Since window_size < n here, n - window_size is well-defined.
234    let max_left = n - window_size;
235    if left > max_left {
236        left = max_left;
237    }
238
239    let right = left + window_size.saturating_sub(1);
240
241    debug_assert_eq!(
242        right - left + 1,
243        window_size,
244        "initialize_window produced wrong size"
245    );
246    (left, right)
247}
248
249// ============================================================================
250// Interpolation
251// ============================================================================
252
253/// Linearly interpolate smoothed values between fitted points.
254pub fn interpolate_gap<T: Float>(x: &[T], y_smooth: &mut [T], last: usize, current: usize) {
255    if current <= last + 1 {
256        return;
257    }
258    let denom = x[current] - x[last];
259    if denom <= T::zero() {
260        let val = y_smooth[last];
261        for ys in y_smooth.iter_mut().take(current).skip(last + 1) {
262            *ys = val;
263        }
264        return;
265    }
266
267    let y_current = y_smooth[current];
268    let y_last = y_smooth[last];
269
270    let xs = &x[(last + 1)..current];
271    for (xj, ys) in xs.iter().zip(y_smooth.iter_mut().skip(last + 1)) {
272        let alpha = (*xj - x[last]) / denom;
273        *ys = alpha * y_current + (T::one() - alpha) * y_last;
274    }
275}
276
277// ============================================================================
278// Delta Optimization
279// ============================================================================
280
281/// Skip points that are within delta distance of the last fitted point.
282pub fn skip_close_points<T: Float>(
283    x: &[T],
284    y_smooth: &mut [T],
285    delta: T,
286    last_fitted: &mut usize,
287    n: usize,
288) -> usize {
289    let mut last = *last_fitted;
290
291    // No delta optimization -> advance one step (cap to last index).
292    if delta <= T::zero() {
293        return usize::min(last.saturating_add(1), n.saturating_sub(1));
294    }
295
296    // If at or beyond the end, return final index.
297    if last + 1 >= n {
298        return n.saturating_sub(1);
299    }
300
301    // 1) Handle contiguous identical-x ties immediately after `last`.
302    // Copy the last fitted value into tied neighbors and advance `last_fitted`
303    // to the end of that tie-block so they are treated as processed.
304    let mut tie_end = last;
305    while tie_end + 1 < n && x[tie_end + 1] == x[last] {
306        tie_end += 1;
307    }
308    if tie_end > last {
309        for i in (last + 1)..=tie_end {
310            y_smooth[i] = y_smooth[last];
311        }
312        *last_fitted = tie_end;
313        last = *last_fitted;
314
315        if last + 1 >= n {
316            return n.saturating_sub(1);
317        }
318    }
319
320    // 2) If any later point already has a fitted value, jump to it.
321    for (i, ys) in y_smooth.iter().enumerate().skip(last + 1) {
322        if *ys != T::zero() {
323            return i;
324        }
325    }
326
327    // 3) Apply delta-based skip logic from the updated `last`.
328    let cut = x[last] + delta;
329    let slice = &x[(last + 1)..n];
330    let pos = slice.partition_point(|&xj| xj <= cut);
331
332    // Next index should be the first index beyond those <= cut:
333    let mut next = last.saturating_add(pos).saturating_add(1);
334
335    // Cap to last valid index
336    if next > n.saturating_sub(1) {
337        next = n.saturating_sub(1);
338    }
339
340    // Ensure we advance at least one step and never go out of bounds.
341    let candidate = usize::max(last.saturating_add(1), next);
342    usize::min(candidate, n.saturating_sub(1))
343}
344
345// ============================================================================
346// Weight Normalization
347// ============================================================================
348
349/// Normalize weights to sum to 1 over a specified range.
350pub fn normalize_weights<T: Float>(weights: &mut [T], left: usize, right: usize, sum: T) {
351    debug_assert!(sum > T::zero(), "Sum must be positive for normalization");
352    let inv = T::one() / sum;
353    for w in weights.iter_mut().take(right + 1).skip(left) {
354        *w = *w * inv;
355    }
356}
357
358/// Normalize entire weight array to sum to 1.
359pub fn normalize_all_weights<T: Float>(weights: &mut [T]) {
360    let sum: T = weights.iter().copied().fold(T::zero(), |acc, v| acc + v);
361    if sum > T::zero() {
362        let inv = T::one() / sum;
363        for w in weights.iter_mut() {
364            *w = *w * inv;
365        }
366    } else if !weights.is_empty() {
367        let uniform = T::one() / T::from(weights.len()).unwrap();
368        weights.fill(uniform);
369    }
370}
371
372// ============================================================================
373// Range and Distance Utilities
374// ============================================================================
375
376/// Compute the range of x values.
377pub fn compute_range<T: Float>(x: &[T]) -> T {
378    if x.is_empty() {
379        T::zero()
380    } else {
381        x[x.len() - 1] - x[0]
382    }
383}
384
385/// Find index of rightmost point within a distance threshold.
386pub fn find_rightmost_point<T: Float>(
387    x: &[T],
388    x_current: T,
389    left: usize,
390    n: usize,
391    threshold: T,
392) -> usize {
393    let target = x_current + threshold;
394    x[left..n]
395        .partition_point(|&xj| xj <= target)
396        .saturating_sub(1)
397        + left
398}
399
400// ============================================================================
401// Statistical Utilities
402// ============================================================================
403
404/// Compute weighted average over a range.
405pub fn compute_weighted_average<T: Float>(
406    values: &[T],
407    weights: &[T],
408    left: usize,
409    right: usize,
410) -> T {
411    let mut sum = T::zero();
412    for j in left..=right {
413        sum = sum + weights[j] * values[j];
414    }
415    sum
416}
417
418/// Check if a value is effectively zero.
419pub fn is_effectively_zero<T: Float>(value: T, scale: T) -> bool {
420    if scale <= T::zero() {
421        return value == T::zero();
422    }
423    // Machine-epsilon based term (keeps behavior proportional to precision)
424    let multiplier = T::from(8.0).unwrap();
425    let eps_term = T::epsilon() * multiplier;
426
427    // Practical absolute floor:
428    // ~1e-9 relative to scale for f64, while f32 will use its larger eps_term.
429    let abs_floor = T::from(1e-9).unwrap();
430
431    let base = if eps_term > abs_floor {
432        eps_term
433    } else {
434        abs_floor
435    };
436    let threshold = base * scale;
437    value.abs() <= threshold
438}