lowess/utils.rs
1//! Utility functions for LOWESS smoothing.
2//!
3//! This module provides lightweight, production-safe helpers used throughout
4//! the LOWESS pipeline. The functions are intentionally defensive: they
5//! validate inputs, document preconditions, and return safe values on
6//! degenerate inputs rather than panicking in release builds.
7//!
8//! Global expectations
9//! - Callers should prefer to pre-clean data (remove NaNs/infs) and sort `x`
10//! when windowing behavior depends on monotonic order. Several helpers
11//! assert correctness with debug-only checks but return safe outputs in
12//! production paths.
13//!
14//! Key parameters and their semantics
15//! - x: slice of independent variable values. Many helpers assume `x` is
16//! sorted ascending; if not, use `sort_by_x` before window-based ops.
17//! - y: slice of dependent variable values; must be aligned with `x`.
18//! - frac: smoothing fraction in (0, 1]. Used to compute window sizes.
19//! validate_inputs will reject non-finite or out-of-range values.
20//! - window_size (usize): discrete neighborhood size computed from `frac`
21//! and `n` via `calculate_window_size`. Always at least 2 and at most `n`.
22//! - delta: interpolation distance threshold (`Option<T>` or T). When None
23//! `calculate_delta` returns a conservative default (~1% of x-range). A
24//! zero or negative delta disables the skip/fast-path logic.
25//! - left/right/current/idx: integer indices describing the local window
26//! boundaries; many window helpers return clamped values in [0, n-1].
27//!
28//! Important functions (brief)
29//! - validate_inputs(x, y, frac): checks lengths, finiteness, minimum points,
30//! and fraction bounds. Returns a Result for early failure in callers.
31//! - validate_confidence_level(level): ensure level ∈ (0,1) for interval code.
32//! - validate_delta(delta): ensures delta ≥ 0 and finite.
33//! - sort_by_x: stable mapping of x/y to sorted order (returns new Vecs).
34//! - calculate_delta(delta, x_sorted): resolves optional delta to a numeric
35//! value (defaults to 1% of range for None).
36//! - calculate_window_size(n, frac): converts fractional span to integer
37//! window size with safe min/max clamping.
38//! - initialize_window / update_window: establish and slide local windows
39//! while keeping them valid for regression computations.
40//! - interpolate_gap: linear interpolation between two fitted points; used
41//! when delta indicates points can be filled in rather than re-fit.
42//! - skip_close_points: delta-driven fast-path to skip/refill sequences of
43//! points close to the last fitted x. Also handles identical-x ties by
44//! copying the last fitted value for stability.
45//! - normalize_weights / normalize_all_weights: numeric-safe normalization
46//! with uniform fallback when total weight is (near) zero.
47//! - compute_range / find_rightmost_point: range and threshold helpers used
48//! by windowing and delta logic.
49//! - compute_weighted_average / is_effectively_zero: small numeric utilities
50//! used widely across fitting and diagnostics code.
51//!
52//! Production recommendations
53//! - Validate and deduplicate inputs upstream for large datasets. The helpers
54//! here validate and handle edge cases, but upstream cleaning improves
55//! determinism and performance.
56//! - Use `delta` (default ~1% of range) for dense inputs to accelerate fitting.
57//! - Choose `frac` in (0, 1]; cross-validation at the builder layer is
58//! recommended for data-driven selection.
59
60#[cfg(not(feature = "std"))]
61extern crate alloc;
62#[cfg(not(feature = "std"))]
63use alloc::vec::Vec;
64
65use crate::{LowessError, Result};
66use core::cmp::Ordering;
67use num_traits::Float;
68
69// ============================================================================
70// Input Validation
71// ============================================================================
72
73/// Validate input arrays for LOWESS smoothing.
74pub fn validate_inputs<T: Float>(x: &[T], y: &[T], frac: T) -> Result<()> {
75 // Empty input is an error
76 if x.is_empty() || y.is_empty() {
77 return Err(LowessError::EmptyInput);
78 }
79
80 // Length mismatch
81 if x.len() != y.len() {
82 return Err(LowessError::MismatchedInputs {
83 x_len: x.len(),
84 y_len: y.len(),
85 });
86 }
87
88 // Require at least two points for a meaningful local regression
89 if x.len() < 2 {
90 return Err(LowessError::TooFewPoints {
91 got: x.len(),
92 min: 2,
93 });
94 }
95
96 // Reject NaN / infinite in inputs
97 for (i, &xi) in x.iter().enumerate() {
98 if !xi.is_finite() {
99 return Err(LowessError::InvalidNumericValue(format!(
100 "x[{}]={}",
101 i,
102 xi.to_f64().unwrap_or(f64::NAN)
103 )));
104 }
105 }
106 for (i, &yi) in y.iter().enumerate() {
107 if !yi.is_finite() {
108 return Err(LowessError::InvalidNumericValue(format!(
109 "y[{}]={}",
110 i,
111 yi.to_f64().unwrap_or(f64::NAN)
112 )));
113 }
114 }
115
116 // Validate fraction
117 if !frac.is_finite() || frac <= T::zero() || frac > T::one() {
118 return Err(LowessError::InvalidFraction(
119 frac.to_f64().unwrap_or(f64::NAN),
120 ));
121 }
122
123 Ok(())
124}
125
126/// Validate confidence level parameter.
127pub fn validate_confidence_level<T: Float>(level: T) -> Result<()> {
128 if !level.is_finite() || level <= T::zero() || level >= T::one() {
129 return Err(LowessError::InvalidConfidenceLevel(
130 level.to_f64().unwrap_or(f64::NAN),
131 ));
132 }
133 Ok(())
134}
135
136/// Validate delta parameter.
137pub fn validate_delta<T: Float>(delta: T) -> Result<()> {
138 if !delta.is_finite() || delta < T::zero() {
139 return Err(LowessError::InvalidDelta(
140 delta.to_f64().unwrap_or(f64::NAN),
141 ));
142 }
143 Ok(())
144}
145
146// ============================================================================
147// Data Preparation
148// ============================================================================
149
150/// Sort data by x values and return sorted (x, y) pairs.
151pub fn sort_by_x<T: Float>(x: &[T], y: &[T]) -> (Vec<T>, Vec<T>) {
152 debug_assert_eq!(x.len(), y.len(), "Length mismatch in sort_by_x");
153 let mut indices: Vec<usize> = (0..x.len()).collect();
154 indices.sort_unstable_by(|&i, &j| x[i].partial_cmp(&x[j]).unwrap_or(Ordering::Equal));
155 let x_sorted = indices.iter().map(|&i| x[i]).collect();
156 let y_sorted = indices.iter().map(|&i| y[i]).collect();
157 (x_sorted, y_sorted)
158}
159
160/// Check if x values are sorted in ascending order.
161pub fn is_sorted<T: Float>(x: &[T]) -> bool {
162 x.windows(2).all(|w| w[0] <= w[1])
163}
164
165// ============================================================================
166// Delta Calculation
167// ============================================================================
168
169/// Calculate delta parameter for interpolation optimization.
170pub fn calculate_delta<T: Float>(delta: Option<T>, x_sorted: &[T]) -> Result<T> {
171 match delta {
172 Some(d) => {
173 validate_delta(d)?;
174 Ok(d)
175 }
176 None => {
177 if x_sorted.is_empty() {
178 Ok(T::zero())
179 } else {
180 let range = x_sorted[x_sorted.len() - 1] - x_sorted[0];
181 Ok(T::from(0.01).unwrap() * range)
182 }
183 }
184 }
185}
186
187// ============================================================================
188// Window Size Calculation
189// ============================================================================
190
191/// Calculate window size from fraction and number of points.
192pub fn calculate_window_size<T: Float>(n: usize, frac: T) -> usize {
193 let frac_n = frac * T::from(n).unwrap();
194 let frac_n_int = frac_n.to_usize().unwrap_or(0);
195 usize::max(2, usize::min(n, frac_n_int))
196}
197
198// ============================================================================
199// Window Management
200// ============================================================================
201
202/// Update window boundaries to maintain optimal window around current point.
203pub fn update_window<T: Float>(
204 x: &[T],
205 current: usize,
206 left: &mut usize,
207 right: &mut usize,
208 n: usize,
209) {
210 while *right < n - 1 {
211 let d_left = x[current] - x[*left];
212 let d_right = x[*right + 1] - x[current];
213 if d_left <= d_right {
214 break;
215 }
216 *left += 1;
217 *right += 1;
218 }
219}
220
221/// Initialize window boundaries for a given point.
222pub fn initialize_window(idx: usize, window_size: usize, n: usize) -> (usize, usize) {
223 // If requested window covers (or exceeds) the whole array, return full range.
224 if window_size >= n {
225 return (0, n.saturating_sub(1));
226 }
227
228 let half = window_size / 2;
229 // Start by centering the window around idx.
230 let mut left = idx.saturating_sub(half);
231
232 // Clamp left so the window fits in [0, n-1] and has the requested size.
233 // Since window_size < n here, n - window_size is well-defined.
234 let max_left = n - window_size;
235 if left > max_left {
236 left = max_left;
237 }
238
239 let right = left + window_size.saturating_sub(1);
240
241 debug_assert_eq!(
242 right - left + 1,
243 window_size,
244 "initialize_window produced wrong size"
245 );
246 (left, right)
247}
248
249// ============================================================================
250// Interpolation
251// ============================================================================
252
253/// Linearly interpolate smoothed values between fitted points.
254pub fn interpolate_gap<T: Float>(x: &[T], y_smooth: &mut [T], last: usize, current: usize) {
255 if current <= last + 1 {
256 return;
257 }
258 let denom = x[current] - x[last];
259 if denom <= T::zero() {
260 let val = y_smooth[last];
261 for ys in y_smooth.iter_mut().take(current).skip(last + 1) {
262 *ys = val;
263 }
264 return;
265 }
266
267 let y_current = y_smooth[current];
268 let y_last = y_smooth[last];
269
270 let xs = &x[(last + 1)..current];
271 for (xj, ys) in xs.iter().zip(y_smooth.iter_mut().skip(last + 1)) {
272 let alpha = (*xj - x[last]) / denom;
273 *ys = alpha * y_current + (T::one() - alpha) * y_last;
274 }
275}
276
277// ============================================================================
278// Delta Optimization
279// ============================================================================
280
281/// Skip points that are within delta distance of the last fitted point.
282pub fn skip_close_points<T: Float>(
283 x: &[T],
284 y_smooth: &mut [T],
285 delta: T,
286 last_fitted: &mut usize,
287 n: usize,
288) -> usize {
289 let mut last = *last_fitted;
290
291 // No delta optimization -> advance one step (cap to last index).
292 if delta <= T::zero() {
293 return usize::min(last.saturating_add(1), n.saturating_sub(1));
294 }
295
296 // If at or beyond the end, return final index.
297 if last + 1 >= n {
298 return n.saturating_sub(1);
299 }
300
301 // 1) Handle contiguous identical-x ties immediately after `last`.
302 // Copy the last fitted value into tied neighbors and advance `last_fitted`
303 // to the end of that tie-block so they are treated as processed.
304 let mut tie_end = last;
305 while tie_end + 1 < n && x[tie_end + 1] == x[last] {
306 tie_end += 1;
307 }
308 if tie_end > last {
309 for i in (last + 1)..=tie_end {
310 y_smooth[i] = y_smooth[last];
311 }
312 *last_fitted = tie_end;
313 last = *last_fitted;
314
315 if last + 1 >= n {
316 return n.saturating_sub(1);
317 }
318 }
319
320 // 2) If any later point already has a fitted value, jump to it.
321 for (i, ys) in y_smooth.iter().enumerate().skip(last + 1) {
322 if *ys != T::zero() {
323 return i;
324 }
325 }
326
327 // 3) Apply delta-based skip logic from the updated `last`.
328 let cut = x[last] + delta;
329 let slice = &x[(last + 1)..n];
330 let pos = slice.partition_point(|&xj| xj <= cut);
331
332 // Next index should be the first index beyond those <= cut:
333 let mut next = last.saturating_add(pos).saturating_add(1);
334
335 // Cap to last valid index
336 if next > n.saturating_sub(1) {
337 next = n.saturating_sub(1);
338 }
339
340 // Ensure we advance at least one step and never go out of bounds.
341 let candidate = usize::max(last.saturating_add(1), next);
342 usize::min(candidate, n.saturating_sub(1))
343}
344
345// ============================================================================
346// Weight Normalization
347// ============================================================================
348
349/// Normalize weights to sum to 1 over a specified range.
350pub fn normalize_weights<T: Float>(weights: &mut [T], left: usize, right: usize, sum: T) {
351 debug_assert!(sum > T::zero(), "Sum must be positive for normalization");
352 let inv = T::one() / sum;
353 for w in weights.iter_mut().take(right + 1).skip(left) {
354 *w = *w * inv;
355 }
356}
357
358/// Normalize entire weight array to sum to 1.
359pub fn normalize_all_weights<T: Float>(weights: &mut [T]) {
360 let sum: T = weights.iter().copied().fold(T::zero(), |acc, v| acc + v);
361 if sum > T::zero() {
362 let inv = T::one() / sum;
363 for w in weights.iter_mut() {
364 *w = *w * inv;
365 }
366 } else if !weights.is_empty() {
367 let uniform = T::one() / T::from(weights.len()).unwrap();
368 weights.fill(uniform);
369 }
370}
371
372// ============================================================================
373// Range and Distance Utilities
374// ============================================================================
375
376/// Compute the range of x values.
377pub fn compute_range<T: Float>(x: &[T]) -> T {
378 if x.is_empty() {
379 T::zero()
380 } else {
381 x[x.len() - 1] - x[0]
382 }
383}
384
385/// Find index of rightmost point within a distance threshold.
386pub fn find_rightmost_point<T: Float>(
387 x: &[T],
388 x_current: T,
389 left: usize,
390 n: usize,
391 threshold: T,
392) -> usize {
393 let target = x_current + threshold;
394 x[left..n]
395 .partition_point(|&xj| xj <= target)
396 .saturating_sub(1)
397 + left
398}
399
400// ============================================================================
401// Statistical Utilities
402// ============================================================================
403
404/// Compute weighted average over a range.
405pub fn compute_weighted_average<T: Float>(
406 values: &[T],
407 weights: &[T],
408 left: usize,
409 right: usize,
410) -> T {
411 let mut sum = T::zero();
412 for j in left..=right {
413 sum = sum + weights[j] * values[j];
414 }
415 sum
416}
417
418/// Check if a value is effectively zero.
419pub fn is_effectively_zero<T: Float>(value: T, scale: T) -> bool {
420 if scale <= T::zero() {
421 return value == T::zero();
422 }
423 // Machine-epsilon based term (keeps behavior proportional to precision)
424 let multiplier = T::from(8.0).unwrap();
425 let eps_term = T::epsilon() * multiplier;
426
427 // Practical absolute floor:
428 // ~1e-9 relative to scale for f64, while f32 will use its larger eps_term.
429 let abs_floor = T::from(1e-9).unwrap();
430
431 let base = if eps_term > abs_floor {
432 eps_term
433 } else {
434 abs_floor
435 };
436 let threshold = base * scale;
437 value.abs() <= threshold
438}