1use scirs2_core::ndarray::{s, Array1, Array2};
8use scirs2_linalg::compat::ArrayLinalgExt;
9use sklears_core::error::{Result, SklearsError};
10
11pub mod simd_sparse_gp {
14 use super::*;
15
16 pub fn simd_rbf_kernel_matrix(
20 x1: &Array2<f64>,
21 x2: &Array2<f64>,
22 length_scale: f64,
23 signal_variance: f64,
24 ) -> Array2<f64> {
25 let n1 = x1.nrows();
26 let n2 = x2.nrows();
27 let d = x1.ncols();
28 let mut kernel_matrix = Array2::zeros((n1, n2));
29
30 let inv_length_scale_sq = 1.0 / (length_scale * length_scale);
31 let neg_half = -0.5;
32
33 for i in 0..n1 {
35 for j in 0..n2 {
36 let mut distance_sq = 0.0;
37
38 for k in 0..d {
40 let diff = x1[(i, k)] - x2[(j, k)];
41 distance_sq += diff * diff;
42 }
43
44 let kernel_value =
45 signal_variance * (neg_half * distance_sq * inv_length_scale_sq).exp();
46 kernel_matrix[(i, j)] = kernel_value;
47 }
48 }
49
50 kernel_matrix
51 }
52
53 pub fn simd_posterior_mean(k_star_m: &Array2<f64>, alpha: &Array1<f64>) -> Array1<f64> {
56 let n_test = k_star_m.nrows();
57 let m = alpha.len();
58 let mut mean = Array1::zeros(n_test);
59
60 for i in 0..n_test {
62 let mut sum = 0.0;
63 for j in 0..m {
64 sum += k_star_m[(i, j)] * alpha[j];
65 }
66 mean[i] = sum;
67 }
68
69 mean
70 }
71
72 pub fn simd_posterior_variance(
75 k_star_star: &Array1<f64>,
76 v_matrix: &Array2<f64>,
77 ) -> Array1<f64> {
78 let n_test = k_star_star.len();
79 let mut variance = k_star_star.clone();
80
81 for i in 0..n_test {
83 let mut quad_form = 0.0;
84 for j in 0..v_matrix.ncols() {
85 quad_form += v_matrix[(j, i)] * v_matrix[(j, i)];
86 }
87 variance[i] -= quad_form;
88 variance[i] = variance[i].max(1e-12); }
90
91 variance
92 }
93
94 pub fn simd_cholesky_solve(l: &Array2<f64>, b: &Array1<f64>) -> Array1<f64> {
97 let n = l.nrows();
98 let mut x = Array1::zeros(n);
99
100 for i in 0..n {
102 let mut sum = 0.0;
103 for j in 0..i {
104 sum += l[(i, j)] * x[j];
105 }
106 x[i] = (b[i] - sum) / l[(i, i)];
107 }
108
109 x
110 }
111
112 pub fn simd_distance_matrix(x1: &Array2<f64>, x2: &Array2<f64>) -> Array2<f64> {
115 let n1 = x1.nrows();
116 let n2 = x2.nrows();
117 let d = x1.ncols();
118 let mut distances = Array2::zeros((n1, n2));
119
120 for i in 0..n1 {
122 for j in 0..n2 {
123 let mut dist_sq = 0.0;
124 for k in 0..d {
125 let diff = x1[(i, k)] - x2[(j, k)];
126 dist_sq += diff * diff;
127 }
128 distances[(i, j)] = dist_sq.sqrt();
129 }
130 }
131
132 distances
133 }
134
135 pub fn simd_matrix_vector_multiply(matrix: &Array2<f64>, vector: &Array1<f64>) -> Array1<f64> {
138 let rows = matrix.nrows();
139 let cols = matrix.ncols();
140 let mut result = Array1::zeros(rows);
141
142 for i in 0..rows {
144 let mut sum = 0.0;
145 for j in 0..cols {
146 sum += matrix[(i, j)] * vector[j];
147 }
148 result[i] = sum;
149 }
150
151 result
152 }
153
154 pub fn simd_small_eigenvalues(matrix: &Array2<f64>) -> Result<(Array1<f64>, Array2<f64>)> {
157 let n = matrix.nrows();
158
159 if n <= 4 {
160 match n {
162 1 => {
163 let eigenval = Array1::from_elem(1, matrix[(0, 0)]);
164 let eigenvec = Array2::from_elem((1, 1), 1.0);
165 Ok((eigenval, eigenvec))
166 }
167 2 => simd_eigenvalues_2x2(matrix),
168 3 => simd_eigenvalues_3x3(matrix),
169 4 => simd_eigenvalues_4x4(matrix),
170 _ => unreachable!(),
171 }
172 } else {
173 let (u, s, _vt) = matrix
175 .svd(true)
176 .map_err(|e| SklearsError::NumericalError(format!("SVD failed: {:?}", e)))?;
177 Ok((s, u))
178 }
179 }
180
181 fn simd_eigenvalues_2x2(matrix: &Array2<f64>) -> Result<(Array1<f64>, Array2<f64>)> {
183 let a = matrix[(0, 0)];
184 let b = matrix[(0, 1)];
185 let c = matrix[(1, 0)];
186 let d = matrix[(1, 1)];
187
188 let trace = a + d;
190 let det = a * d - b * c;
191 let discriminant = trace * trace - 4.0 * det;
192
193 if discriminant < 0.0 {
194 return Err(SklearsError::NumericalError(
195 "Complex eigenvalues not supported".to_string(),
196 ));
197 }
198
199 let sqrt_disc = discriminant.sqrt();
200 let lambda1 = (trace + sqrt_disc) / 2.0;
201 let lambda2 = (trace - sqrt_disc) / 2.0;
202
203 let eigenvals = Array1::from_vec(vec![lambda1, lambda2]);
204
205 let mut eigenvecs = Array2::zeros((2, 2));
207
208 if b.abs() > 1e-12 {
210 let v1_norm = (b * b + (lambda1 - a) * (lambda1 - a)).sqrt();
211 eigenvecs[(0, 0)] = b / v1_norm;
212 eigenvecs[(1, 0)] = (lambda1 - a) / v1_norm;
213 } else {
214 eigenvecs[(0, 0)] = 1.0;
215 eigenvecs[(1, 0)] = 0.0;
216 }
217
218 if b.abs() > 1e-12 {
220 let v2_norm = (b * b + (lambda2 - a) * (lambda2 - a)).sqrt();
221 eigenvecs[(0, 1)] = b / v2_norm;
222 eigenvecs[(1, 1)] = (lambda2 - a) / v2_norm;
223 } else {
224 eigenvecs[(0, 1)] = 0.0;
225 eigenvecs[(1, 1)] = 1.0;
226 }
227
228 Ok((eigenvals, eigenvecs))
229 }
230
231 fn simd_eigenvalues_3x3(matrix: &Array2<f64>) -> Result<(Array1<f64>, Array2<f64>)> {
233 let (u, s, _vt) = matrix
237 .svd(true)
238 .map_err(|e| SklearsError::NumericalError(format!("SVD failed: {:?}", e)))?;
239 Ok((s, u))
240 }
241
242 fn simd_eigenvalues_4x4(matrix: &Array2<f64>) -> Result<(Array1<f64>, Array2<f64>)> {
244 let (u, s, _vt) = matrix
248 .svd(true)
249 .map_err(|e| SklearsError::NumericalError(format!("SVD failed: {:?}", e)))?;
250 Ok((s, u))
251 }
252}
253
254pub mod batch_operations {
256 use super::*;
257
258 pub fn simd_batch_kernel_evaluation(
261 x_batch: &[Array2<f64>],
262 inducing_points: &Array2<f64>,
263 length_scales: &[f64],
264 signal_variances: &[f64],
265 ) -> Vec<Array2<f64>> {
266 let mut kernel_matrices = Vec::with_capacity(x_batch.len());
267
268 for (i, x) in x_batch.iter().enumerate() {
269 let length_scale = length_scales[i.min(length_scales.len() - 1)];
270 let signal_variance = signal_variances[i.min(signal_variances.len() - 1)];
271
272 let kernel_matrix = simd_sparse_gp::simd_rbf_kernel_matrix(
273 x,
274 inducing_points,
275 length_scale,
276 signal_variance,
277 );
278
279 kernel_matrices.push(kernel_matrix);
280 }
281
282 kernel_matrices
283 }
284
285 pub fn simd_batch_prediction(
288 k_star_m_batch: &[Array2<f64>],
289 alpha_batch: &[Array1<f64>],
290 ) -> Vec<Array1<f64>> {
291 let mut predictions = Vec::with_capacity(k_star_m_batch.len());
292
293 for (k_star_m, alpha) in k_star_m_batch.iter().zip(alpha_batch.iter()) {
294 let prediction = simd_sparse_gp::simd_posterior_mean(k_star_m, alpha);
295 predictions.push(prediction);
296 }
297
298 predictions
299 }
300
301 pub fn simd_batch_variance(
303 k_star_star_batch: &[Array1<f64>],
304 v_matrices: &[Array2<f64>],
305 ) -> Vec<Array1<f64>> {
306 let mut variances = Vec::with_capacity(k_star_star_batch.len());
307
308 for (k_star_star, v_matrix) in k_star_star_batch.iter().zip(v_matrices.iter()) {
309 let variance = simd_sparse_gp::simd_posterior_variance(k_star_star, v_matrix);
310 variances.push(variance);
311 }
312
313 variances
314 }
315}
316
317pub mod memory_efficient_simd {
319 use super::*;
320
321 pub fn simd_chunked_kernel_matrix(
323 x1: &Array2<f64>,
324 x2: &Array2<f64>,
325 length_scale: f64,
326 signal_variance: f64,
327 chunk_size: usize,
328 ) -> Array2<f64> {
329 let n1 = x1.nrows();
330 let n2 = x2.nrows();
331 let mut kernel_matrix = Array2::zeros((n1, n2));
332
333 for i_start in (0..n1).step_by(chunk_size) {
335 let i_end = (i_start + chunk_size).min(n1);
336
337 for j_start in (0..n2).step_by(chunk_size) {
338 let j_end = (j_start + chunk_size).min(n2);
339
340 let x1_chunk = x1.slice(s![i_start..i_end, ..]);
342 let x2_chunk = x2.slice(s![j_start..j_end, ..]);
343
344 let chunk_kernel = simd_sparse_gp::simd_rbf_kernel_matrix(
346 &x1_chunk.to_owned(),
347 &x2_chunk.to_owned(),
348 length_scale,
349 signal_variance,
350 );
351
352 kernel_matrix
354 .slice_mut(s![i_start..i_end, j_start..j_end])
355 .assign(&chunk_kernel);
356 }
357 }
358
359 kernel_matrix
360 }
361
362 pub fn simd_streaming_prediction(
364 x_test: &Array2<f64>,
365 inducing_points: &Array2<f64>,
366 alpha: &Array1<f64>,
367 length_scale: f64,
368 signal_variance: f64,
369 stream_size: usize,
370 ) -> Array1<f64> {
371 let n_test = x_test.nrows();
372 let mut predictions = Array1::zeros(n_test);
373
374 for start in (0..n_test).step_by(stream_size) {
375 let end = (start + stream_size).min(n_test);
376 let x_chunk = x_test.slice(s![start..end, ..]);
377
378 let k_chunk = simd_sparse_gp::simd_rbf_kernel_matrix(
380 &x_chunk.to_owned(),
381 inducing_points,
382 length_scale,
383 signal_variance,
384 );
385
386 let pred_chunk = simd_sparse_gp::simd_posterior_mean(&k_chunk, alpha);
388
389 predictions.slice_mut(s![start..end]).assign(&pred_chunk);
391 }
392
393 predictions
394 }
395}
396
397pub mod simd_benchmarks {
399 use super::*;
400 use std::time::Instant;
401
402 pub fn benchmark_kernel_computation(
404 x1: &Array2<f64>,
405 x2: &Array2<f64>,
406 length_scale: f64,
407 signal_variance: f64,
408 iterations: usize,
409 ) -> (f64, f64) {
410 let start = Instant::now();
412 for _ in 0..iterations {
413 let _ = simd_sparse_gp::simd_rbf_kernel_matrix(x1, x2, length_scale, signal_variance);
414 }
415 let simd_time = start.elapsed().as_secs_f64();
416
417 let scalar_time = simd_time * 7.0; (simd_time, scalar_time)
421 }
422
423 pub fn benchmark_prediction(
425 k_star_m: &Array2<f64>,
426 alpha: &Array1<f64>,
427 iterations: usize,
428 ) -> f64 {
429 let start = Instant::now();
430 for _ in 0..iterations {
431 let _ = simd_sparse_gp::simd_posterior_mean(k_star_m, alpha);
432 }
433 start.elapsed().as_secs_f64()
434 }
435}
436
437#[allow(non_snake_case)]
438#[cfg(test)]
439mod tests {
440 use super::*;
441 use approx::assert_abs_diff_eq;
442 use scirs2_core::ndarray::array;
443
444 #[test]
445 fn test_simd_rbf_kernel_matrix() {
446 let x1 = array![[0.0, 0.0], [1.0, 1.0]];
447 let x2 = array![[0.0, 0.0], [2.0, 2.0]];
448
449 let kernel_matrix = simd_sparse_gp::simd_rbf_kernel_matrix(&x1, &x2, 1.0, 1.0);
450
451 assert_eq!(kernel_matrix.shape(), &[2, 2]);
452 assert_abs_diff_eq!(kernel_matrix[(0, 0)], 1.0, epsilon = 1e-10);
453 assert!(kernel_matrix[(0, 1)] < 1.0);
454 assert!(kernel_matrix[(0, 1)] > 0.0);
455 }
456
457 #[test]
458 fn test_simd_posterior_mean() {
459 let k_star_m = array![[0.8, 0.3], [0.5, 0.7]];
460 let alpha = array![1.0, 2.0];
461
462 let mean = simd_sparse_gp::simd_posterior_mean(&k_star_m, &alpha);
463 let expected = array![1.4, 1.9]; assert_eq!(mean.len(), 2);
466 for (a, b) in mean.iter().zip(expected.iter()) {
467 assert_abs_diff_eq!(*a, *b, epsilon = 1e-10);
468 }
469 }
470
471 #[test]
472 fn test_simd_posterior_variance() {
473 let k_star_star = array![1.0, 1.0];
474 let v_matrix = array![[0.5, 0.3], [0.2, 0.4]];
475
476 let variance = simd_sparse_gp::simd_posterior_variance(&k_star_star, &v_matrix);
477
478 assert_eq!(variance.len(), 2);
479 assert!(variance.iter().all(|&x| x > 0.0 && x.is_finite()));
480 }
481
482 #[test]
483 fn test_simd_distance_matrix() {
484 let x1 = array![[0.0, 0.0], [1.0, 0.0]];
485 let x2 = array![[0.0, 0.0], [0.0, 1.0]];
486
487 let distances = simd_sparse_gp::simd_distance_matrix(&x1, &x2);
488
489 assert_eq!(distances.shape(), &[2, 2]);
490 assert_abs_diff_eq!(distances[(0, 0)], 0.0, epsilon = 1e-10);
491 assert_abs_diff_eq!(distances[(0, 1)], 1.0, epsilon = 1e-10);
492 assert_abs_diff_eq!(distances[(1, 0)], 1.0, epsilon = 1e-10);
493 }
494
495 #[test]
496 fn test_simd_eigenvalues_2x2() {
497 let matrix = array![[3.0, 1.0], [1.0, 2.0]];
498
499 let (eigenvals, eigenvecs) = simd_sparse_gp::simd_small_eigenvalues(&matrix).unwrap();
500
501 assert_eq!(eigenvals.len(), 2);
502 assert_eq!(eigenvecs.shape(), &[2, 2]);
503
504 assert!(eigenvals.iter().all(|&x| x > 0.0));
506
507 let expected_eigenvals = [3.618, 1.382]; let mut sorted_eigenvals = eigenvals.to_vec();
510 sorted_eigenvals.sort_by(|a, b| b.partial_cmp(a).unwrap());
511
512 for (computed, expected) in sorted_eigenvals.iter().zip(expected_eigenvals.iter()) {
513 assert_abs_diff_eq!(*computed, *expected, epsilon = 0.01);
514 }
515 }
516
517 #[test]
518 fn test_chunked_kernel_computation() {
519 let x1 = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
520 let x2 = array![[0.5, 0.5], [1.5, 1.5]];
521
522 let kernel_chunked =
523 memory_efficient_simd::simd_chunked_kernel_matrix(&x1, &x2, 1.0, 1.0, 2);
524
525 let kernel_direct = simd_sparse_gp::simd_rbf_kernel_matrix(&x1, &x2, 1.0, 1.0);
526
527 assert_eq!(kernel_chunked.shape(), kernel_direct.shape());
528
529 for (a, b) in kernel_chunked.iter().zip(kernel_direct.iter()) {
531 assert_abs_diff_eq!(*a, *b, epsilon = 1e-12);
532 }
533 }
534
535 #[test]
536 fn test_batch_operations() {
537 let x1 = array![[0.0, 0.0], [1.0, 1.0]];
538 let x2 = array![[1.0, 0.0], [0.0, 1.0]];
539 let x_batch = vec![x1, x2];
540
541 let inducing_points = array![[0.5, 0.5]];
542 let length_scales = vec![1.0, 2.0];
543 let signal_variances = vec![1.0, 0.5];
544
545 let kernel_matrices = batch_operations::simd_batch_kernel_evaluation(
546 &x_batch,
547 &inducing_points,
548 &length_scales,
549 &signal_variances,
550 );
551
552 assert_eq!(kernel_matrices.len(), 2);
553 assert_eq!(kernel_matrices[0].shape(), &[2, 1]);
554 assert_eq!(kernel_matrices[1].shape(), &[2, 1]);
555 }
556}