1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
/// Generate test dataset for statistical validation
///
/// Creates a dataset with known statistical properties for testing
/// floating-point equivalence across backends.
///
/// # Arguments
///
/// * `size` - Number of elements to generate
///
/// # Returns
///
/// Vector of f64 values with realistic distribution
///
/// # Example
///
/// ```rust
/// use pmat::services::analytics_backend::stats::generate_test_dataset;
///
/// let dataset = generate_test_dataset(100_000);
/// assert_eq!(dataset.len(), 100_000);
/// ```
pub fn generate_test_dataset(size: usize) -> Vec<f64> {
// Use a deterministic seed for reproducibility
// Generate data with realistic range (avoid overflow/underflow)
(0..size)
.map(|i| {
// Mix of positive/negative, large/small values
let base = (i as f64) * 0.001;
let sign = if i % 2 == 0 { 1.0 } else { -1.0 };
base * sign
})
.collect()
}
/// Compute mean and standard deviation of dataset
///
/// Uses Welford's online algorithm for numerical stability.
///
/// # Arguments
///
/// * `values` - Slice of f64 values
///
/// # Returns
///
/// Tuple of (mean, standard_deviation)
///
/// # Example
///
/// ```rust
/// use pmat::services::analytics_backend::stats::mean_and_std;
///
/// let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
/// let (mean, std) = mean_and_std(&values);
/// assert!((mean - 3.0).abs() < 0.01);
/// ```
pub fn mean_and_std(values: &[f64]) -> (f64, f64) {
use aprender::primitives::Vector;
if values.is_empty() {
return (0.0, 0.0);
}
// Convert f64 to f32 for aprender (Phase 3: Statistics Migration)
let values_f32: Vec<f32> = values.iter().map(|&x| x as f32).collect();
let vec = Vector::from_slice(&values_f32);
let mean = vec.mean() as f64;
// aprender computes population variance (dividing by n)
// Convert to sample variance (dividing by n-1) for Bessel's correction
let population_variance = vec.variance() as f64;
let sample_variance = if values.len() > 1 {
population_variance * values.len() as f64 / (values.len() - 1) as f64
} else {
0.0
};
(mean, sample_variance.sqrt())
}
/// Compute average of dataset using specified backend
///
/// # Arguments
///
/// * `dataset` - Slice of f64 values
/// * `backend` - Backend to use for computation
///
/// # Returns
///
/// Average value
///
/// # Example
///
/// ```rust
/// use pmat::services::analytics_backend::{Backend, stats::compute_avg};
///
/// let dataset = vec![1.0, 2.0, 3.0, 4.0, 5.0];
/// let avg = compute_avg(&dataset, Backend::Scalar).unwrap();
/// assert!((avg - 3.0).abs() < 0.01);
/// ```
pub fn compute_avg(dataset: &[f64], backend: Backend) -> Result<f64> {
if dataset.is_empty() {
return Ok(0.0);
}
match backend {
#[cfg(feature = "analytics-gpu")]
Backend::Gpu => compute_avg_gpu(dataset),
#[cfg(feature = "analytics-simd")]
Backend::Simd => compute_avg_simd(dataset),
Backend::Scalar => compute_avg_scalar(dataset),
}
}
/// Scalar implementation of average (baseline)
fn compute_avg_scalar(dataset: &[f64]) -> Result<f64> {
let sum: f64 = dataset.iter().sum();
Ok(sum / dataset.len() as f64)
}
/// SIMD implementation using trueno
#[cfg(feature = "analytics-simd")]
fn compute_avg_simd(dataset: &[f64]) -> Result<f64> {
// Scalar placeholder until trueno::simd::sum() is available
compute_avg_scalar(dataset)
}
/// GPU implementation using wgpu compute shaders
#[cfg(feature = "analytics-gpu")]
fn compute_avg_gpu(dataset: &[f64]) -> Result<f64> {
use super::gpu::GpuDevice;
// Initialize GPU device (cached globally)
let device = GpuDevice::get_or_init()?;
// Dispatch GPU compute
device
.compute_sum(dataset)
.map(|sum| sum / dataset.len() as f64)
}