gbrt_rs/utils/
math.rs

1//! Mathematical utilities for statistical and linear algebra operations.
2//!
3//! This module provides a comprehensive set of mathematical functions and traits for:
4//! - Statistical operations (mean, variance, quantiles, etc.)
5//! - Vector mathematics (dot products, norms, normalization)
6//! - Matrix operations (covariance, correlation, axis-wise statistics)
7//!
8//! # Design
9//!
10//! The module uses **trait-based polymorphism** to support multiple data types:
11//! - [`Statistics`] trait for statistical measures
12//! - [`VectorMath`] trait for vector operations
13//! - [`MatrixMath`] trait for matrix operations
14//!
15//! Implementations are provided for both `Vec<f64>` and `ndarray` types, enabling
16//! seamless integration with different data representations.
17//!
18//! # Error Handling
19//!
20//! All operations return [`MathResult<T>`], a type alias for `Result<T, MathError>`.
21//! This provides robust error handling for edge cases like empty inputs, dimension
22//! mismatches, and numerical instability.
23
24use ndarray::{Array1, Array2, ArrayView1, ArrayView2};
25use std::collections::HashMap;
26use thiserror::Error;
27
28/// Errors that can occur during mathematical computations.
29///
30/// These errors cover invalid inputs, numerical instabilities, dimension mismatches,
31/// and other mathematical operation failures.
32#[derive(Error, Debug)]
33pub enum MathError {
34    /// Input values are invalid for the operation.
35    #[error("Invalid input: {0}")]
36    InvalidInput(String),
37    
38    /// Numerical computation failed (e.g., overflow, underflow).
39    #[error("Numerical error: {0}")]
40    NumericalError(String),
41    
42    /// Array dimensions don't match the expected shape.
43    #[error("Dimension mismatch: expected {expected}, got {actual}")]
44    DimensionMismatch { expected: String, actual: String },
45    
46    /// Input data is empty.
47    #[error("Empty input data")]
48    EmptyInput,
49    
50    /// Division by zero occurred.
51    #[error("Division by zero")]
52    DivisionByZero,
53}
54
55/// Result type for mathematical operations.
56///
57/// This is a convenient type alias for `Result<T, MathError>`.
58pub type MathResult<T> = std::result::Result<T, MathError>;
59
60/// Statistical operations for numeric collections.
61///
62/// This trait provides descriptive statistics including central tendency,
63/// dispersion, shape measures, and quantile calculations.
64pub trait Statistics {
65    /// Computes the arithmetic mean (average).
66    fn mean(&self) -> MathResult<f64>;
67    
68    /// Computes sample variance (with Bessel's correction, divides by n-1).
69    fn variance(&self) -> MathResult<f64>;
70    
71    /// Computes sample standard deviation (square root of variance).
72    fn std_dev(&self) -> MathResult<f64>;
73    
74    /// Finds the minimum value.
75    fn min(&self) -> MathResult<f64>;
76    
77    /// Finds the maximum value.
78    fn max(&self) -> MathResult<f64>;
79    
80    /// Computes the q-th quantile (0 ≤ q ≤ 1).
81    ///
82    /// Uses linear interpolation for non-integer quantile positions.
83    fn quantile(&self, q: f64) -> MathResult<f64>;
84    
85    /// Computes skewness (measure of asymmetry).
86    ///
87    /// Returns 0 for symmetric distributions, positive for right-skewed,
88    /// negative for left-skewed.
89    fn skewness(&self) -> MathResult<f64>;
90    
91    /// Computes excess kurtosis (measure of tail heaviness).
92    ///
93    /// Normal distributions have kurtosis = 0. Positive values indicate
94    /// heavier tails, negative values indicate lighter tails.
95    fn kurtosis(&self) -> MathResult<f64>;
96}
97
98/// Vector mathematical operations.
99///
100/// Provides linear algebra operations for 1D numeric arrays.
101pub trait VectorMath {
102    /// Computes dot product with another vector.
103    fn dot(&self, other: &[f64]) -> MathResult<f64>;
104    
105    /// Computes Lp norm (p > 0).
106    ///
107    /// - p = 1: Manhattan norm
108    /// - p = 2: Euclidean norm (default)
109    fn norm(&self, p: f64) -> MathResult<f64>;
110    
111    /// Normalizes vector to unit length (L2 norm = 1).
112    fn normalize(&self) -> MathResult<Vec<f64>>;
113    
114    /// Standardizes to zero mean and unit variance.
115    fn standardize(&self) -> MathResult<Vec<f64>>;
116    
117    /// Clips values to [min, max] range.
118    fn clip(&self, min: f64, max: f64) -> Vec<f64>;
119    
120    /// Element-wise addition with another vector.
121    fn add(&self, other: &[f64]) -> MathResult<Vec<f64>>;
122    
123    /// Element-wise subtraction with another vector.
124    fn subtract(&self, other: &[f64]) -> MathResult<Vec<f64>>;
125    
126    /// Scalar multiplication.
127    fn multiply(&self, scalar: f64) -> Vec<f64>;
128}
129
130/// Matrix mathematical operations.
131///
132/// Provides linear algebra and statistics for 2D arrays.
133pub trait MatrixMath {
134    /// Computes mean along specified axis.
135    ///
136    /// - axis = 0: column means
137    /// - axis = 1: row means
138    fn mean_along_axis(&self, axis: usize) -> MathResult<Array1<f64>>;
139
140    /// Computes variance along specified axis (sample variance).
141    fn variance_along_axis(&self, axis: usize) -> MathResult<Array1<f64>>;
142
143    /// Computes covariance matrix.
144    fn covariance_matrix(&self) -> MathResult<Array2<f64>>;
145
146    /// Computes correlation matrix.
147    fn correlation_matrix(&self) -> MathResult<Array2<f64>>;
148}
149
150// Vec<f64> implementation of Statistics trait
151impl Statistics for Vec<f64> {
152    fn mean(&self) -> MathResult<f64> {
153        compute_mean(self)
154    }
155    
156    fn variance(&self) -> MathResult<f64> {
157        compute_variance(self)
158    }
159    
160    fn std_dev(&self) -> MathResult<f64> {
161        compute_std_dev(self)
162    }
163    
164    fn min(&self) -> MathResult<f64> {
165        if self.is_empty() {
166            return Err(MathError::EmptyInput);
167        }
168        Ok(self.iter().fold(f64::INFINITY, |a, &b| a.min(b)))
169    }
170    
171    fn max(&self) -> MathResult<f64> {
172        if self.is_empty() {
173            return Err(MathError::EmptyInput);
174        }
175        Ok(self.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b)))
176    }
177    
178    fn quantile(&self, q: f64) -> MathResult<f64> {
179        if !(0.0..=1.0).contains(&q) {
180            return Err(MathError::InvalidInput(
181                format!("Quantile must be between 0 and 1, got {}", q)
182            ));
183        }
184        
185        if self.is_empty() {
186            return Err(MathError::EmptyInput);
187        }
188        
189        let mut sorted = self.clone();
190        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
191        
192        let index = q * (sorted.len() - 1) as f64;
193        let lower_index = index.floor() as usize;
194        let upper_index = index.ceil() as usize;
195        
196        if lower_index == upper_index {
197            Ok(sorted[lower_index])
198        } else {
199            let weight = index - lower_index as f64;
200            Ok(sorted[lower_index] * (1.0 - weight) + sorted[upper_index] * weight)
201        }
202    }
203    
204    fn skewness(&self) -> MathResult<f64> {
205        let mean = self.mean()?;
206        let std_dev = self.std_dev()?;
207        
208        if std_dev == 0.0 {
209            return Ok(0.0);
210        }
211        
212        let n = self.len() as f64;
213        let sum_cubed_deviations: f64 = self.iter()
214            .map(|&x| ((x - mean) / std_dev).powi(3))
215            .sum();
216        
217        Ok(sum_cubed_deviations / n)
218    }
219    
220    fn kurtosis(&self) -> MathResult<f64> {
221        let mean = self.mean()?;
222        let std_dev = self.std_dev()?;
223        
224        if std_dev == 0.0 {
225            return Ok(0.0);
226        }
227        
228        let n = self.len() as f64;
229        let sum_fourth_deviations: f64 = self.iter()
230            .map(|&x| ((x - mean) / std_dev).powi(4))
231            .sum();
232        
233        Ok(sum_fourth_deviations / n - 3.0) // excess kurtosis
234    }
235}
236
237// ndarray::Array1<f64> implementation of Statistics trait
238impl Statistics for Array1<f64> {
239    fn mean(&self) -> MathResult<f64> {
240        compute_mean(self.as_slice()
241            .ok_or_else(|| MathError::NumericalError("Non-contiguous array data".to_string()))?)
242    }
243    
244    fn variance(&self) -> MathResult<f64> {
245        compute_variance(self.as_slice()
246            .ok_or_else(|| MathError::NumericalError("Non-contiguous array data".to_string()))?)
247    }
248    
249    fn std_dev(&self) -> MathResult<f64> {
250        compute_std_dev(self.as_slice()
251            .ok_or_else(|| MathError::NumericalError("Non-contiguous array data".to_string()))?)
252    }
253    
254    fn min(&self) -> MathResult<f64> {
255        if self.is_empty() {
256            return Err(MathError::EmptyInput);
257        }
258        Ok(self.iter().fold(f64::INFINITY, |a, &b| a.min(b)))
259    }
260    
261    fn max(&self) -> MathResult<f64> {
262        if self.is_empty() {
263            return Err(MathError::EmptyInput);
264        }
265        Ok(self.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b)))
266    }
267    
268    fn quantile(&self, q: f64) -> MathResult<f64> {
269        compute_quantile(&self.to_vec(), q)
270    }
271    
272    fn skewness(&self) -> MathResult<f64> {
273        compute_skewness(self.as_slice()
274            .ok_or_else(|| MathError::NumericalError("Non-contiguous array data".to_string()))?)
275    }
276    
277    fn kurtosis(&self) -> MathResult<f64> {
278        compute_kurtosis(self.as_slice()
279            .ok_or_else(|| MathError::NumericalError("Non-contiguous array data".to_string()))?)
280    }
281}
282
283// ndarray::ArrayView1<f64> implementation of Statistics trait
284impl Statistics for ArrayView1<'_, f64> {
285    fn mean(&self) -> MathResult<f64> {
286        compute_mean(self.as_slice()
287            .ok_or_else(|| MathError::NumericalError("Non-contiguous array view".to_string()))?)
288    }
289    
290    fn variance(&self) -> MathResult<f64> {
291        compute_variance(self.as_slice()
292            .ok_or_else(|| MathError::NumericalError("Non-contiguous array view".to_string()))?)
293    }
294    
295    fn std_dev(&self) -> MathResult<f64> {
296        compute_std_dev(self.as_slice()
297            .ok_or_else(|| MathError::NumericalError("Non-contiguous array view".to_string()))?)
298    }
299    
300    fn min(&self) -> MathResult<f64> {
301        if self.is_empty() {
302            return Err(MathError::EmptyInput);
303        }
304        Ok(self.iter().fold(f64::INFINITY, |a, &b| a.min(b)))
305    }
306    
307    fn max(&self) -> MathResult<f64> {
308        if self.is_empty() {
309            return Err(MathError::EmptyInput);
310        }
311        Ok(self.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b)))
312    }
313    
314    fn quantile(&self, q: f64) -> MathResult<f64> {
315        compute_quantile(&self.to_vec(), q)
316    }
317    
318    fn skewness(&self) -> MathResult<f64> {
319        compute_skewness(self.as_slice()
320            .ok_or_else(|| MathError::NumericalError("Non-contiguous array view".to_string()))?)
321    }
322    
323    fn kurtosis(&self) -> MathResult<f64> {
324        compute_kurtosis(self.as_slice()
325            .ok_or_else(|| MathError::NumericalError("Non-contiguous array view".to_string()))?)
326    }
327}
328
329// Helper functions for skewness and kurtosis
330fn compute_skewness(values: &[f64]) -> MathResult<f64> {
331    let mean = compute_mean(values)?;
332    let std_dev = compute_std_dev(values)?;
333    
334    if std_dev == 0.0 {
335        return Ok(0.0);
336    }
337    
338    let n = values.len() as f64;
339    let sum_cubed_deviations: f64 = values.iter()
340        .map(|&x| ((x - mean) / std_dev).powi(3))
341        .sum();
342    
343    Ok(sum_cubed_deviations / n)
344}
345
346fn compute_kurtosis(values: &[f64]) -> MathResult<f64> {
347    let mean = compute_mean(values)?;
348    let std_dev = compute_std_dev(values)?;
349    
350    if std_dev == 0.0 {
351        return Ok(0.0);
352    }
353    
354    let n = values.len() as f64;
355    let sum_fourth_deviations: f64 = values.iter()
356        .map(|&x| ((x - mean) / std_dev).powi(4))
357        .sum();
358    
359    Ok(sum_fourth_deviations / n - 3.0)
360}
361
362// Helper function for quantile calculation
363fn compute_quantile(values: &[f64], q: f64) -> MathResult<f64> {
364    if !(0.0..=1.0).contains(&q) {
365        return Err(MathError::InvalidInput(
366            format!("Quantile must be between 0 and 1, got {}", q)
367        ));
368    }
369    
370    if values.is_empty() {
371        return Err(MathError::EmptyInput);
372    }
373    
374    let mut sorted = values.to_vec();
375    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
376    
377    let index = q * (sorted.len() - 1) as f64;
378    let lower_index = index.floor() as usize;
379    let upper_index = index.ceil() as usize;
380    
381    if lower_index == upper_index {
382        Ok(sorted[lower_index])
383    } else {
384        let weight = index - lower_index as f64;
385        Ok(sorted[lower_index] * (1.0 - weight) + sorted[upper_index] * weight)
386    }
387}
388
389// Vec<f64> implementation of VectorMath trait
390impl VectorMath for Vec<f64> {
391    fn dot(&self, other: &[f64]) -> MathResult<f64> {
392        if self.len() != other.len() {
393            return Err(MathError::DimensionMismatch {
394                expected: self.len().to_string(),
395                actual: other.len().to_string(),
396            });
397        }
398        
399        Ok(self.iter()
400            .zip(other.iter())
401            .map(|(&a, &b)| a * b)
402            .sum())
403    }
404    
405    fn norm(&self, p: f64) -> MathResult<f64> {
406        if p <= 0.0 {
407            return Err(MathError::InvalidInput(
408                format!("p must be positive, got {}", p)
409            ));
410        }
411        
412        if p == 1.0 {
413            Ok(l1_norm(self))
414        } else if p == 2.0 {
415            Ok(l2_norm(self))
416        } else {
417            let sum: f64 = self.iter()
418                .map(|&x| x.abs().powf(p))
419                .sum();
420            Ok(sum.powf(1.0 / p))
421        }
422    }
423    
424    fn normalize(&self) -> MathResult<Vec<f64>> {
425        normalize_vector(self)
426    }
427    
428    fn standardize(&self) -> MathResult<Vec<f64>> {
429        standardize_vector(self)
430    }
431    
432    fn clip(&self, min: f64, max: f64) -> Vec<f64> {
433        clip_values(self, min, max)
434    }
435    
436    fn add(&self, other: &[f64]) -> MathResult<Vec<f64>> {
437        if self.len() != other.len() {
438            return Err(MathError::DimensionMismatch {
439                expected: self.len().to_string(),
440                actual: other.len().to_string(),
441            });
442        }
443        
444        Ok(self.iter()
445            .zip(other.iter())
446            .map(|(&a, &b)| a + b)
447            .collect())
448    }
449    
450    fn subtract(&self, other: &[f64]) -> MathResult<Vec<f64>> {
451        if self.len() != other.len() {
452            return Err(MathError::DimensionMismatch {
453                expected: self.len().to_string(),
454                actual: other.len().to_string(),
455            });
456        }
457        
458        Ok(self.iter()
459            .zip(other.iter())
460            .map(|(&a, &b)| a - b)
461            .collect())
462    }
463    
464    fn multiply(&self, scalar: f64) -> Vec<f64> {
465        self.iter().map(|&x| x * scalar).collect()
466    }
467}
468
469// ndarray::Array2<f64> implementation of MatrixMath trait
470impl MatrixMath for Array2<f64> {
471    fn mean_along_axis(&self, axis: usize) -> MathResult<Array1<f64>> {
472        if axis > 1 {
473            return Err(MathError::InvalidInput(
474                format!("Axis must be 0 or 1, got {}", axis)
475            ));
476        }
477        
478        match axis {
479            0 => {
480                // Mean of each column
481                let n_rows = self.nrows() as f64;
482                Ok(self.mean_axis(ndarray::Axis(0)).unwrap() / n_rows)
483            }
484            1 => {
485                // Mean of each row
486                let n_cols = self.ncols() as f64;
487                Ok(self.mean_axis(ndarray::Axis(1)).unwrap() / n_cols)
488            }
489            _ => unreachable!(),
490        }
491    }
492    
493    fn variance_along_axis(&self, axis: usize) -> MathResult<Array1<f64>> {
494        if axis > 1 {
495            return Err(MathError::InvalidInput(
496                format!("Axis must be 0 or 1, got {}", axis)
497            ));
498        }
499        
500        let means = self.mean_along_axis(axis)?;
501        let n = if axis == 0 { self.nrows() } else { self.ncols() } as f64;
502        
503        match axis {
504            0 => {
505                // Variance of each column
506                let variances: Vec<f64> = (0..self.ncols())
507                    .map(|col| {
508                        let col_data = self.column(col);
509                        let mean = means[col];
510                        col_data.iter()
511                            .map(|&x| (x - mean).powi(2))
512                            .sum::<f64>() / (n - 1.0)
513                    })
514                    .collect();
515                Ok(Array1::from(variances))
516            }
517            1 => {
518                // Variance of each row
519                let variances: Vec<f64> = (0..self.nrows())
520                    .map(|row| {
521                        let row_data = self.row(row);
522                        let mean = means[row];
523                        row_data.iter()
524                            .map(|&x| (x - mean).powi(2))
525                            .sum::<f64>() / (n - 1.0)
526                    })
527                    .collect();
528                Ok(Array1::from(variances))
529            }
530            _ => unreachable!(),
531        }
532    }
533    
534    fn covariance_matrix(&self) -> MathResult<Array2<f64>> {
535        let n_samples = self.nrows() as f64;
536        if n_samples <= 1.0 {
537            return Err(MathError::InvalidInput(
538                "Need at least 2 samples for covariance".to_string()
539            ));
540        }
541        
542        let means = self.mean_along_axis(0)?;
543        let mut covariance = Array2::zeros((self.ncols(), self.ncols()));
544        
545        for i in 0..self.ncols() {
546            for j in 0..self.ncols() {
547                let cov: f64 = (0..self.nrows())
548                    .map(|k| (self[[k, i]] - means[i]) * (self[[k, j]] - means[j]))
549                    .sum::<f64>() / (n_samples - 1.0);
550                covariance[[i, j]] = cov;
551            }
552        }
553        
554        Ok(covariance)
555    }
556    
557    fn correlation_matrix(&self) -> MathResult<Array2<f64>> {
558        let covariance = self.covariance_matrix()?;
559        let std_devs = self.variance_along_axis(0)?.mapv(|v| v.sqrt());
560        
561        let mut correlation = Array2::zeros(covariance.raw_dim());
562        
563        for i in 0..covariance.nrows() {
564            for j in 0..covariance.ncols() {
565                if std_devs[i] == 0.0 || std_devs[j] == 0.0 {
566                    correlation[[i, j]] = 0.0;
567                } else {
568                    correlation[[i, j]] = covariance[[i, j]] / (std_devs[i] * std_devs[j]);
569                }
570            }
571        }
572        
573        Ok(correlation)
574    }
575}
576
577// Basic statistical functions
578
579/// Computes the arithmetic mean of a slice.
580///
581/// # Arguments
582///
583/// * `values` - Slice of numeric values
584///
585/// # Returns
586///
587/// The average value
588///
589/// # Errors
590///
591/// Returns [`MathError::EmptyInput`] if `values` is empty
592pub fn compute_mean(values: &[f64]) -> MathResult<f64> {
593    if values.is_empty() {
594        return Err(MathError::EmptyInput);
595    }
596    
597    let sum: f64 = values.iter().sum();
598    Ok(sum / values.len() as f64)
599}
600
601/// Computes sample variance (with Bessel's correction).
602///
603/// Divides by `n-1` rather than `n` to provide an unbiased estimator.
604///
605/// # Arguments
606///
607/// * `values` - Slice of numeric values
608///
609/// # Returns
610///
611/// Sample variance
612///
613/// # Errors
614///
615/// Returns [`MathError::InvalidInput`] if fewer than 2 values are provided
616pub fn compute_variance(values: &[f64]) -> MathResult<f64> {
617    if values.len() < 2 {
618        return Err(MathError::InvalidInput(
619            "Need at least 2 values for variance".to_string()
620        ));
621    }
622    
623    let mean = compute_mean(values)?;
624    let sum_sq_diff: f64 = values.iter()
625        .map(|&x| (x - mean).powi(2))
626        .sum();
627    
628    Ok(sum_sq_diff / (values.len() - 1) as f64)
629}
630
631/// Computes sample standard deviation.
632///
633/// Returns the square root of the sample variance.
634///
635/// # Arguments
636///
637/// * `values` - Slice of numeric values
638///
639/// # Returns
640///
641/// Sample standard deviation
642pub fn compute_std_dev(values: &[f64]) -> MathResult<f64> {
643    let variance = compute_variance(values)?;
644    Ok(variance.sqrt())
645}
646
647/// Applies softmax function to convert logits to probabilities.
648///
649/// For numerical stability, subtracts the maximum value before exponentiation.
650///
651/// # Arguments
652///
653/// * `values` - Slice of logits (unnormalized log probabilities)
654///
655/// # Returns
656///
657/// Vector of probabilities that sum to 1.0
658///
659/// # Errors
660///
661/// Returns [`MathError::EmptyInput`] if `values` is empty
662pub fn softmax(values: &[f64]) -> MathResult<Vec<f64>> {
663    if values.is_empty() {
664        return Err(MathError::EmptyInput);
665    }
666    
667    // For numerical stability, subtract max value
668    let max_val = values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
669    let exps: Vec<f64> = values.iter()
670        .map(|&x| (x - max_val).exp())
671        .collect();
672    
673    let sum_exps: f64 = exps.iter().sum();
674    
675    Ok(exps.iter().map(|&x| x / sum_exps).collect())
676}
677
678/// Applies sigmoid (logistic) function.
679///
680/// Maps any real number to (0, 1).
681///
682/// # Arguments
683///
684/// * `x` - Input value
685///
686/// # Returns
687///
688/// Sigmoid(x) = 1 / (1 + e^(-x))
689pub fn sigmoid(x: f64) -> f64 {
690    1.0 / (1.0 + (-x).exp())
691}
692
693/// Computes logit (inverse of sigmoid).
694///
695/// # Arguments
696///
697/// * `p` - Probability in (0, 1)
698///
699/// # Returns
700///
701/// logit(p) = ln(p / (1-p))
702///
703/// # Errors
704///
705/// Returns [`MathError::InvalidInput`] if `p` is not in (0, 1)
706pub fn logit(p: f64) -> MathResult<f64> {
707    if p <= 0.0 || p >= 1.0 {
708        return Err(MathError::InvalidInput(
709            format!("Probability must be in (0, 1), got {}", p)
710        ));
711    }
712    
713    Ok((p / (1.0 - p)).ln())
714}
715
716/// Computes L1 norm (Manhattan distance).
717///
718/// Sum of absolute values.
719///
720/// # Arguments
721///
722/// * `values` - Slice of numeric values
723///
724/// # Returns
725///
726/// L1 norm
727pub fn l1_norm(values: &[f64]) -> f64 {
728    values.iter().map(|&x| x.abs()).sum()
729}
730
731/// Computes L2 norm (Euclidean norm).
732///
733/// Square root of sum of squares.
734///
735/// # Arguments
736///
737/// * `values` - Slice of numeric values
738///
739/// # Returns
740///
741/// L2 norm
742pub fn l2_norm(values: &[f64]) -> f64 {
743    values.iter().map(|&x| x.powi(2)).sum::<f64>().sqrt()
744}
745
746/// Computes Euclidean distance between two vectors.
747///
748/// # Arguments
749///
750/// * `a` - First vector
751/// * `b` - Second vector
752///
753/// # Returns
754///
755/// Euclidean distance
756///
757/// # Errors
758///
759/// Returns [`MathError::DimensionMismatch`] if vectors have different lengths
760pub fn euclidean_distance(a: &[f64], b: &[f64]) -> MathResult<f64> {
761    if a.len() != b.len() {
762        return Err(MathError::DimensionMismatch {
763            expected: a.len().to_string(),
764            actual: b.len().to_string(),
765        });
766    }
767    
768    let sum_sq: f64 = a.iter()
769        .zip(b.iter())
770        .map(|(&x, &y)| (x - y).powi(2))
771        .sum();
772    
773    Ok(sum_sq.sqrt())
774}
775
776/// Clips values to a specified range.
777///
778/// Values below `min` become `min`, values above `max` become `max`.
779///
780/// # Arguments
781///
782/// * `values` - Input values
783/// * `min` - Minimum bound
784/// * `max` - Maximum bound
785///
786/// # Returns
787///
788/// Clipped values
789pub fn clip_values(values: &[f64], min: f64, max: f64) -> Vec<f64> {
790    values.iter()
791        .map(|&x| x.max(min).min(max))
792        .collect()
793}
794
795/// Normalizes vector to unit length (L2 norm = 1).
796///
797/// # Arguments
798///
799/// * `values` - Input vector
800///
801/// # Returns
802///
803/// Normalized vector
804///
805/// # Errors
806///
807/// Returns [`MathError::NumericalError`] if vector has zero norm
808pub fn normalize_vector(values: &[f64]) -> MathResult<Vec<f64>> {
809    let norm = l2_norm(values);
810    if norm == 0.0 {
811        return Err(MathError::NumericalError("Cannot normalize zero vector".to_string()));
812    }
813    
814    Ok(values.iter().map(|&x| x / norm).collect())
815}
816
817/// Standardizes vector to zero mean and unit variance.
818///
819/// # Arguments
820///
821/// * `values` - Input vector
822///
823/// # Returns
824///
825/// Standardized vector: (x - mean) / std_dev
826///
827/// # Errors
828///
829/// Returns [`MathError::NumericalError`] if standard deviation is zero
830pub fn standardize_vector(values: &[f64]) -> MathResult<Vec<f64>> {
831    let mean = compute_mean(values)?;
832    let std_dev = compute_std_dev(values)?;
833    
834    if std_dev == 0.0 {
835        return Err(MathError::NumericalError("Cannot standardize constant vector".to_string()));
836    }
837    
838    Ok(values.iter().map(|&x| (x - mean) / std_dev).collect())
839}
840
841/// Computes weighted mean.
842///
843/// # Arguments
844///
845/// * `values` - Values to average
846/// * `weights` - Weights for each value
847///
848/// # Returns
849///
850/// Weighted mean: Σ(values × weights) / Σ(weights)
851///
852/// # Errors
853///
854/// Returns [`MathError::DimensionMismatch`] if lengths differ,
855/// [`MathError::EmptyInput`] if empty, or [`MathError::DivisionByZero`] if weights sum to zero
856pub fn weighted_mean(values: &[f64], weights: &[f64]) -> MathResult<f64> {
857    if values.len() != weights.len() {
858        return Err(MathError::DimensionMismatch {
859            expected: values.len().to_string(),
860            actual: weights.len().to_string(),
861        });
862    }
863    
864    if values.is_empty() {
865        return Err(MathError::EmptyInput);
866    }
867    
868    let weighted_sum: f64 = values.iter()
869        .zip(weights.iter())
870        .map(|(&x, &w)| x * w)
871        .sum();
872    
873    let weight_sum: f64 = weights.iter().sum();
874    
875    if weight_sum == 0.0 {
876        return Err(MathError::DivisionByZero);
877    }
878    
879    Ok(weighted_sum / weight_sum)
880}
881
882/// Computes Root Mean Square Error (RMSE).
883///
884/// # Arguments
885///
886/// * `predictions` - Model predictions
887/// * `targets` - True target values
888///
889/// # Returns
890///
891/// RMSE = √[Σ(pred - target)² / n]
892///
893/// # Errors
894///
895/// Returns [`MathError::DimensionMismatch`] if lengths differ,
896/// or [`MathError::EmptyInput`] if empty
897pub fn rmse(predictions: &[f64], targets: &[f64]) -> MathResult<f64> {
898    if predictions.len() != targets.len() {
899        return Err(MathError::DimensionMismatch {
900            expected: predictions.len().to_string(),
901            actual: targets.len().to_string(),
902        });
903    }
904    
905    if predictions.is_empty() {
906        return Err(MathError::EmptyInput);
907    }
908    
909    let mse: f64 = predictions.iter()
910        .zip(targets.iter())
911        .map(|(&pred, &target)| (pred - target).powi(2))
912        .sum::<f64>() / predictions.len() as f64;
913    
914    Ok(mse.sqrt())
915}
916
gbrt_rs/utils/math.rs

gbrt_rs/utils/
math.rs