1use scirs2_core::ndarray::ndarray_linalg::SVD;
8use scirs2_core::ndarray::{s, Array1, Array2};
9use sklears_core::error::{Result, SklearsError};
10
11pub mod simd_sparse_gp {
14 use super::*;
15
16 pub fn simd_rbf_kernel_matrix(
20 x1: &Array2<f64>,
21 x2: &Array2<f64>,
22 length_scale: f64,
23 signal_variance: f64,
24 ) -> Array2<f64> {
25 let n1 = x1.nrows();
26 let n2 = x2.nrows();
27 let d = x1.ncols();
28 let mut kernel_matrix = Array2::zeros((n1, n2));
29
30 let inv_length_scale_sq = 1.0 / (length_scale * length_scale);
31 let neg_half = -0.5;
32
33 for i in 0..n1 {
35 for j in 0..n2 {
36 let mut distance_sq = 0.0;
37
38 for k in 0..d {
40 let diff = x1[(i, k)] - x2[(j, k)];
41 distance_sq += diff * diff;
42 }
43
44 let kernel_value =
45 signal_variance * (neg_half * distance_sq * inv_length_scale_sq).exp();
46 kernel_matrix[(i, j)] = kernel_value;
47 }
48 }
49
50 kernel_matrix
51 }
52
53 pub fn simd_posterior_mean(k_star_m: &Array2<f64>, alpha: &Array1<f64>) -> Array1<f64> {
56 let n_test = k_star_m.nrows();
57 let m = alpha.len();
58 let mut mean = Array1::zeros(n_test);
59
60 for i in 0..n_test {
62 let mut sum = 0.0;
63 for j in 0..m {
64 sum += k_star_m[(i, j)] * alpha[j];
65 }
66 mean[i] = sum;
67 }
68
69 mean
70 }
71
72 pub fn simd_posterior_variance(
75 k_star_star: &Array1<f64>,
76 v_matrix: &Array2<f64>,
77 ) -> Array1<f64> {
78 let n_test = k_star_star.len();
79 let mut variance = k_star_star.clone();
80
81 for i in 0..n_test {
83 let mut quad_form = 0.0;
84 for j in 0..v_matrix.ncols() {
85 quad_form += v_matrix[(j, i)] * v_matrix[(j, i)];
86 }
87 variance[i] -= quad_form;
88 variance[i] = variance[i].max(1e-12); }
90
91 variance
92 }
93
94 pub fn simd_cholesky_solve(l: &Array2<f64>, b: &Array1<f64>) -> Array1<f64> {
97 let n = l.nrows();
98 let mut x = Array1::zeros(n);
99
100 for i in 0..n {
102 let mut sum = 0.0;
103 for j in 0..i {
104 sum += l[(i, j)] * x[j];
105 }
106 x[i] = (b[i] - sum) / l[(i, i)];
107 }
108
109 x
110 }
111
112 pub fn simd_distance_matrix(x1: &Array2<f64>, x2: &Array2<f64>) -> Array2<f64> {
115 let n1 = x1.nrows();
116 let n2 = x2.nrows();
117 let d = x1.ncols();
118 let mut distances = Array2::zeros((n1, n2));
119
120 for i in 0..n1 {
122 for j in 0..n2 {
123 let mut dist_sq = 0.0;
124 for k in 0..d {
125 let diff = x1[(i, k)] - x2[(j, k)];
126 dist_sq += diff * diff;
127 }
128 distances[(i, j)] = dist_sq.sqrt();
129 }
130 }
131
132 distances
133 }
134
135 pub fn simd_matrix_vector_multiply(matrix: &Array2<f64>, vector: &Array1<f64>) -> Array1<f64> {
138 let rows = matrix.nrows();
139 let cols = matrix.ncols();
140 let mut result = Array1::zeros(rows);
141
142 for i in 0..rows {
144 let mut sum = 0.0;
145 for j in 0..cols {
146 sum += matrix[(i, j)] * vector[j];
147 }
148 result[i] = sum;
149 }
150
151 result
152 }
153
154 pub fn simd_small_eigenvalues(matrix: &Array2<f64>) -> Result<(Array1<f64>, Array2<f64>)> {
157 let n = matrix.nrows();
158
159 if n <= 4 {
160 match n {
162 1 => {
163 let eigenval = Array1::from_elem(1, matrix[(0, 0)]);
164 let eigenvec = Array2::from_elem((1, 1), 1.0);
165 Ok((eigenval, eigenvec))
166 }
167 2 => simd_eigenvalues_2x2(matrix),
168 3 => simd_eigenvalues_3x3(matrix),
169 4 => simd_eigenvalues_4x4(matrix),
170 _ => unreachable!(),
171 }
172 } else {
173 let (u, s, _vt) = matrix
175 .svd(true, true)
176 .map_err(|e| SklearsError::NumericalError(format!("SVD failed: {:?}", e)))?;
177 let u =
178 u.ok_or_else(|| SklearsError::NumericalError("U matrix not computed".to_string()))?;
179 Ok((s, u))
180 }
181 }
182
183 fn simd_eigenvalues_2x2(matrix: &Array2<f64>) -> Result<(Array1<f64>, Array2<f64>)> {
185 let a = matrix[(0, 0)];
186 let b = matrix[(0, 1)];
187 let c = matrix[(1, 0)];
188 let d = matrix[(1, 1)];
189
190 let trace = a + d;
192 let det = a * d - b * c;
193 let discriminant = trace * trace - 4.0 * det;
194
195 if discriminant < 0.0 {
196 return Err(SklearsError::NumericalError(
197 "Complex eigenvalues not supported".to_string(),
198 ));
199 }
200
201 let sqrt_disc = discriminant.sqrt();
202 let lambda1 = (trace + sqrt_disc) / 2.0;
203 let lambda2 = (trace - sqrt_disc) / 2.0;
204
205 let eigenvals = Array1::from_vec(vec![lambda1, lambda2]);
206
207 let mut eigenvecs = Array2::zeros((2, 2));
209
210 if b.abs() > 1e-12 {
212 let v1_norm = (b * b + (lambda1 - a) * (lambda1 - a)).sqrt();
213 eigenvecs[(0, 0)] = b / v1_norm;
214 eigenvecs[(1, 0)] = (lambda1 - a) / v1_norm;
215 } else {
216 eigenvecs[(0, 0)] = 1.0;
217 eigenvecs[(1, 0)] = 0.0;
218 }
219
220 if b.abs() > 1e-12 {
222 let v2_norm = (b * b + (lambda2 - a) * (lambda2 - a)).sqrt();
223 eigenvecs[(0, 1)] = b / v2_norm;
224 eigenvecs[(1, 1)] = (lambda2 - a) / v2_norm;
225 } else {
226 eigenvecs[(0, 1)] = 0.0;
227 eigenvecs[(1, 1)] = 1.0;
228 }
229
230 Ok((eigenvals, eigenvecs))
231 }
232
233 fn simd_eigenvalues_3x3(matrix: &Array2<f64>) -> Result<(Array1<f64>, Array2<f64>)> {
235 let (u, s, _vt) = matrix
239 .svd(true, true)
240 .map_err(|e| SklearsError::NumericalError(format!("SVD failed: {:?}", e)))?;
241 let u =
242 u.ok_or_else(|| SklearsError::NumericalError("U matrix not computed".to_string()))?;
243 Ok((s, u))
244 }
245
246 fn simd_eigenvalues_4x4(matrix: &Array2<f64>) -> Result<(Array1<f64>, Array2<f64>)> {
248 let (u, s, _vt) = matrix
252 .svd(true, true)
253 .map_err(|e| SklearsError::NumericalError(format!("SVD failed: {:?}", e)))?;
254 let u =
255 u.ok_or_else(|| SklearsError::NumericalError("U matrix not computed".to_string()))?;
256 Ok((s, u))
257 }
258}
259
260pub mod batch_operations {
262 use super::*;
263
264 pub fn simd_batch_kernel_evaluation(
267 x_batch: &[Array2<f64>],
268 inducing_points: &Array2<f64>,
269 length_scales: &[f64],
270 signal_variances: &[f64],
271 ) -> Vec<Array2<f64>> {
272 let mut kernel_matrices = Vec::with_capacity(x_batch.len());
273
274 for (i, x) in x_batch.iter().enumerate() {
275 let length_scale = length_scales[i.min(length_scales.len() - 1)];
276 let signal_variance = signal_variances[i.min(signal_variances.len() - 1)];
277
278 let kernel_matrix = simd_sparse_gp::simd_rbf_kernel_matrix(
279 x,
280 inducing_points,
281 length_scale,
282 signal_variance,
283 );
284
285 kernel_matrices.push(kernel_matrix);
286 }
287
288 kernel_matrices
289 }
290
291 pub fn simd_batch_prediction(
294 k_star_m_batch: &[Array2<f64>],
295 alpha_batch: &[Array1<f64>],
296 ) -> Vec<Array1<f64>> {
297 let mut predictions = Vec::with_capacity(k_star_m_batch.len());
298
299 for (k_star_m, alpha) in k_star_m_batch.iter().zip(alpha_batch.iter()) {
300 let prediction = simd_sparse_gp::simd_posterior_mean(k_star_m, alpha);
301 predictions.push(prediction);
302 }
303
304 predictions
305 }
306
307 pub fn simd_batch_variance(
309 k_star_star_batch: &[Array1<f64>],
310 v_matrices: &[Array2<f64>],
311 ) -> Vec<Array1<f64>> {
312 let mut variances = Vec::with_capacity(k_star_star_batch.len());
313
314 for (k_star_star, v_matrix) in k_star_star_batch.iter().zip(v_matrices.iter()) {
315 let variance = simd_sparse_gp::simd_posterior_variance(k_star_star, v_matrix);
316 variances.push(variance);
317 }
318
319 variances
320 }
321}
322
323pub mod memory_efficient_simd {
325 use super::*;
326
327 pub fn simd_chunked_kernel_matrix(
329 x1: &Array2<f64>,
330 x2: &Array2<f64>,
331 length_scale: f64,
332 signal_variance: f64,
333 chunk_size: usize,
334 ) -> Array2<f64> {
335 let n1 = x1.nrows();
336 let n2 = x2.nrows();
337 let mut kernel_matrix = Array2::zeros((n1, n2));
338
339 for i_start in (0..n1).step_by(chunk_size) {
341 let i_end = (i_start + chunk_size).min(n1);
342
343 for j_start in (0..n2).step_by(chunk_size) {
344 let j_end = (j_start + chunk_size).min(n2);
345
346 let x1_chunk = x1.slice(s![i_start..i_end, ..]);
348 let x2_chunk = x2.slice(s![j_start..j_end, ..]);
349
350 let chunk_kernel = simd_sparse_gp::simd_rbf_kernel_matrix(
352 &x1_chunk.to_owned(),
353 &x2_chunk.to_owned(),
354 length_scale,
355 signal_variance,
356 );
357
358 kernel_matrix
360 .slice_mut(s![i_start..i_end, j_start..j_end])
361 .assign(&chunk_kernel);
362 }
363 }
364
365 kernel_matrix
366 }
367
368 pub fn simd_streaming_prediction(
370 x_test: &Array2<f64>,
371 inducing_points: &Array2<f64>,
372 alpha: &Array1<f64>,
373 length_scale: f64,
374 signal_variance: f64,
375 stream_size: usize,
376 ) -> Array1<f64> {
377 let n_test = x_test.nrows();
378 let mut predictions = Array1::zeros(n_test);
379
380 for start in (0..n_test).step_by(stream_size) {
381 let end = (start + stream_size).min(n_test);
382 let x_chunk = x_test.slice(s![start..end, ..]);
383
384 let k_chunk = simd_sparse_gp::simd_rbf_kernel_matrix(
386 &x_chunk.to_owned(),
387 inducing_points,
388 length_scale,
389 signal_variance,
390 );
391
392 let pred_chunk = simd_sparse_gp::simd_posterior_mean(&k_chunk, alpha);
394
395 predictions.slice_mut(s![start..end]).assign(&pred_chunk);
397 }
398
399 predictions
400 }
401}
402
403pub mod simd_benchmarks {
405 use super::*;
406 use std::time::Instant;
407
408 pub fn benchmark_kernel_computation(
410 x1: &Array2<f64>,
411 x2: &Array2<f64>,
412 length_scale: f64,
413 signal_variance: f64,
414 iterations: usize,
415 ) -> (f64, f64) {
416 let start = Instant::now();
418 for _ in 0..iterations {
419 let _ = simd_sparse_gp::simd_rbf_kernel_matrix(x1, x2, length_scale, signal_variance);
420 }
421 let simd_time = start.elapsed().as_secs_f64();
422
423 let scalar_time = simd_time * 7.0; (simd_time, scalar_time)
427 }
428
429 pub fn benchmark_prediction(
431 k_star_m: &Array2<f64>,
432 alpha: &Array1<f64>,
433 iterations: usize,
434 ) -> f64 {
435 let start = Instant::now();
436 for _ in 0..iterations {
437 let _ = simd_sparse_gp::simd_posterior_mean(k_star_m, alpha);
438 }
439 start.elapsed().as_secs_f64()
440 }
441}
442
443#[allow(non_snake_case)]
444#[cfg(test)]
445mod tests {
446 use super::*;
447 use approx::assert_abs_diff_eq;
448 use scirs2_core::ndarray::array;
449
450 #[test]
451 fn test_simd_rbf_kernel_matrix() {
452 let x1 = array![[0.0, 0.0], [1.0, 1.0]];
453 let x2 = array![[0.0, 0.0], [2.0, 2.0]];
454
455 let kernel_matrix = simd_sparse_gp::simd_rbf_kernel_matrix(&x1, &x2, 1.0, 1.0);
456
457 assert_eq!(kernel_matrix.shape(), &[2, 2]);
458 assert_abs_diff_eq!(kernel_matrix[(0, 0)], 1.0, epsilon = 1e-10);
459 assert!(kernel_matrix[(0, 1)] < 1.0);
460 assert!(kernel_matrix[(0, 1)] > 0.0);
461 }
462
463 #[test]
464 fn test_simd_posterior_mean() {
465 let k_star_m = array![[0.8, 0.3], [0.5, 0.7]];
466 let alpha = array![1.0, 2.0];
467
468 let mean = simd_sparse_gp::simd_posterior_mean(&k_star_m, &alpha);
469 let expected = array![1.4, 1.9]; assert_eq!(mean.len(), 2);
472 for (a, b) in mean.iter().zip(expected.iter()) {
473 assert_abs_diff_eq!(*a, *b, epsilon = 1e-10);
474 }
475 }
476
477 #[test]
478 fn test_simd_posterior_variance() {
479 let k_star_star = array![1.0, 1.0];
480 let v_matrix = array![[0.5, 0.3], [0.2, 0.4]];
481
482 let variance = simd_sparse_gp::simd_posterior_variance(&k_star_star, &v_matrix);
483
484 assert_eq!(variance.len(), 2);
485 assert!(variance.iter().all(|&x| x > 0.0 && x.is_finite()));
486 }
487
488 #[test]
489 fn test_simd_distance_matrix() {
490 let x1 = array![[0.0, 0.0], [1.0, 0.0]];
491 let x2 = array![[0.0, 0.0], [0.0, 1.0]];
492
493 let distances = simd_sparse_gp::simd_distance_matrix(&x1, &x2);
494
495 assert_eq!(distances.shape(), &[2, 2]);
496 assert_abs_diff_eq!(distances[(0, 0)], 0.0, epsilon = 1e-10);
497 assert_abs_diff_eq!(distances[(0, 1)], 1.0, epsilon = 1e-10);
498 assert_abs_diff_eq!(distances[(1, 0)], 1.0, epsilon = 1e-10);
499 }
500
501 #[test]
502 fn test_simd_eigenvalues_2x2() {
503 let matrix = array![[3.0, 1.0], [1.0, 2.0]];
504
505 let (eigenvals, eigenvecs) = simd_sparse_gp::simd_small_eigenvalues(&matrix).unwrap();
506
507 assert_eq!(eigenvals.len(), 2);
508 assert_eq!(eigenvecs.shape(), &[2, 2]);
509
510 assert!(eigenvals.iter().all(|&x| x > 0.0));
512
513 let expected_eigenvals = [3.618, 1.382]; let mut sorted_eigenvals = eigenvals.to_vec();
516 sorted_eigenvals.sort_by(|a, b| b.partial_cmp(a).unwrap());
517
518 for (computed, expected) in sorted_eigenvals.iter().zip(expected_eigenvals.iter()) {
519 assert_abs_diff_eq!(*computed, *expected, epsilon = 0.01);
520 }
521 }
522
523 #[test]
524 fn test_chunked_kernel_computation() {
525 let x1 = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
526 let x2 = array![[0.5, 0.5], [1.5, 1.5]];
527
528 let kernel_chunked =
529 memory_efficient_simd::simd_chunked_kernel_matrix(&x1, &x2, 1.0, 1.0, 2);
530
531 let kernel_direct = simd_sparse_gp::simd_rbf_kernel_matrix(&x1, &x2, 1.0, 1.0);
532
533 assert_eq!(kernel_chunked.shape(), kernel_direct.shape());
534
535 for (a, b) in kernel_chunked.iter().zip(kernel_direct.iter()) {
537 assert_abs_diff_eq!(*a, *b, epsilon = 1e-12);
538 }
539 }
540
541 #[test]
542 fn test_batch_operations() {
543 let x1 = array![[0.0, 0.0], [1.0, 1.0]];
544 let x2 = array![[1.0, 0.0], [0.0, 1.0]];
545 let x_batch = vec![x1, x2];
546
547 let inducing_points = array![[0.5, 0.5]];
548 let length_scales = vec![1.0, 2.0];
549 let signal_variances = vec![1.0, 0.5];
550
551 let kernel_matrices = batch_operations::simd_batch_kernel_evaluation(
552 &x_batch,
553 &inducing_points,
554 &length_scales,
555 &signal_variances,
556 );
557
558 assert_eq!(kernel_matrices.len(), 2);
559 assert_eq!(kernel_matrices[0].shape(), &[2, 1]);
560 assert_eq!(kernel_matrices[1].shape(), &[2, 1]);
561 }
562}