1use wide::f32x8;
10
11use crate::GpuMultivector;
12use cliffy_core::GA3;
13
14pub fn geometric_product_simd(a: &GpuMultivector, b: &GpuMultivector) -> GpuMultivector {
20 let c0 = a.coeffs[0] * b.coeffs[0] + a.coeffs[1] * b.coeffs[1] + a.coeffs[2] * b.coeffs[2] - a.coeffs[3] * b.coeffs[3] + a.coeffs[4] * b.coeffs[4] - a.coeffs[5] * b.coeffs[5] - a.coeffs[6] * b.coeffs[6] - a.coeffs[7] * b.coeffs[7]; let c1 = a.coeffs[0] * b.coeffs[1] + a.coeffs[1] * b.coeffs[0] - a.coeffs[2] * b.coeffs[3] + a.coeffs[3] * b.coeffs[2] - a.coeffs[4] * b.coeffs[5] + a.coeffs[5] * b.coeffs[4] + a.coeffs[6] * b.coeffs[7] - a.coeffs[7] * b.coeffs[6]; let c2 = a.coeffs[0] * b.coeffs[2] + a.coeffs[1] * b.coeffs[3] + a.coeffs[2] * b.coeffs[0] - a.coeffs[3] * b.coeffs[1] - a.coeffs[4] * b.coeffs[6] - a.coeffs[5] * b.coeffs[7] + a.coeffs[6] * b.coeffs[4] + a.coeffs[7] * b.coeffs[5]; let c3 = a.coeffs[0] * b.coeffs[3] + a.coeffs[1] * b.coeffs[2] - a.coeffs[2] * b.coeffs[1] + a.coeffs[3] * b.coeffs[0] + a.coeffs[4] * b.coeffs[7] + a.coeffs[5] * b.coeffs[6] - a.coeffs[6] * b.coeffs[5] + a.coeffs[7] * b.coeffs[4]; let c4 = a.coeffs[0] * b.coeffs[4] + a.coeffs[1] * b.coeffs[5] + a.coeffs[2] * b.coeffs[6] + a.coeffs[3] * b.coeffs[7] + a.coeffs[4] * b.coeffs[0] - a.coeffs[5] * b.coeffs[1] - a.coeffs[6] * b.coeffs[2] - a.coeffs[7] * b.coeffs[3]; let c5 = a.coeffs[0] * b.coeffs[5] + a.coeffs[1] * b.coeffs[4] - a.coeffs[2] * b.coeffs[7] - a.coeffs[3] * b.coeffs[6] - a.coeffs[4] * b.coeffs[1] + a.coeffs[5] * b.coeffs[0] + a.coeffs[6] * b.coeffs[3] + a.coeffs[7] * b.coeffs[2]; let c6 = a.coeffs[0] * b.coeffs[6] + a.coeffs[1] * b.coeffs[7] + a.coeffs[2] * b.coeffs[4] + a.coeffs[3] * b.coeffs[5] - a.coeffs[4] * b.coeffs[2] - a.coeffs[5] * b.coeffs[3] + a.coeffs[6] * b.coeffs[0] - a.coeffs[7] * b.coeffs[1]; let c7 = a.coeffs[0] * b.coeffs[7] + a.coeffs[1] * b.coeffs[6] - a.coeffs[2] * b.coeffs[5] + a.coeffs[3] * b.coeffs[4] + a.coeffs[4] * b.coeffs[3] - a.coeffs[5] * b.coeffs[2] + a.coeffs[6] * b.coeffs[1] + a.coeffs[7] * b.coeffs[0]; GpuMultivector {
113 coeffs: [c0, c1, c2, c3, c4, c5, c6, c7],
114 }
115}
116
117#[inline]
119pub fn addition_simd(a: &GpuMultivector, b: &GpuMultivector) -> GpuMultivector {
120 let a_vec = f32x8::from(a.coeffs);
121 let b_vec = f32x8::from(b.coeffs);
122 let result = a_vec + b_vec;
123 GpuMultivector {
124 coeffs: result.to_array(),
125 }
126}
127
128#[inline]
130pub fn subtraction_simd(a: &GpuMultivector, b: &GpuMultivector) -> GpuMultivector {
131 let a_vec = f32x8::from(a.coeffs);
132 let b_vec = f32x8::from(b.coeffs);
133 let result = a_vec - b_vec;
134 GpuMultivector {
135 coeffs: result.to_array(),
136 }
137}
138
139#[inline]
141pub fn scalar_mul_simd(a: &GpuMultivector, scalar: f32) -> GpuMultivector {
142 let a_vec = f32x8::from(a.coeffs);
143 let s_vec = f32x8::splat(scalar);
144 let result = a_vec * s_vec;
145 GpuMultivector {
146 coeffs: result.to_array(),
147 }
148}
149
150#[inline]
155pub fn reverse_simd(a: &GpuMultivector) -> GpuMultivector {
156 let signs = f32x8::from([1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, -1.0]);
162 let a_vec = f32x8::from(a.coeffs);
163 let result = a_vec * signs;
164 GpuMultivector {
165 coeffs: result.to_array(),
166 }
167}
168
169#[inline]
173pub fn grade_involution_simd(a: &GpuMultivector) -> GpuMultivector {
174 let signs = f32x8::from([1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0]);
177 let a_vec = f32x8::from(a.coeffs);
178 let result = a_vec * signs;
179 GpuMultivector {
180 coeffs: result.to_array(),
181 }
182}
183
184#[inline]
188pub fn conjugate_simd(a: &GpuMultivector) -> GpuMultivector {
189 let signs = f32x8::from([1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0]);
192 let a_vec = f32x8::from(a.coeffs);
193 let result = a_vec * signs;
194 GpuMultivector {
195 coeffs: result.to_array(),
196 }
197}
198
199#[inline]
201pub fn norm_squared_simd(a: &GpuMultivector) -> f32 {
202 let rev = reverse_simd(a);
204 let prod = geometric_product_simd(a, &rev);
205 prod.coeffs[0]
206}
207
208#[inline]
210pub fn norm_simd(a: &GpuMultivector) -> f32 {
211 norm_squared_simd(a).sqrt()
212}
213
214#[inline]
216pub fn normalize_simd(a: &GpuMultivector) -> GpuMultivector {
217 let n = norm_simd(a);
218 if n > 1e-10 {
219 scalar_mul_simd(a, 1.0 / n)
220 } else {
221 GpuMultivector::zero()
222 }
223}
224
225#[inline]
229pub fn sandwich_simd(a: &GpuMultivector, b: &GpuMultivector) -> GpuMultivector {
230 let b_rev = reverse_simd(b);
231 let temp = geometric_product_simd(b, a);
232 geometric_product_simd(&temp, &b_rev)
233}
234
235#[inline]
237pub fn dot_simd(a: &GpuMultivector, b: &GpuMultivector) -> f32 {
238 let a_vec = f32x8::from(a.coeffs);
239 let b_vec = f32x8::from(b.coeffs);
240 let prod = a_vec * b_vec;
241 prod.reduce_add()
243}
244
245pub fn exp_bivector_simd(b: &GpuMultivector) -> GpuMultivector {
249 let bx = b.coeffs[3]; let by = b.coeffs[5]; let bz = b.coeffs[6]; let mag_sq = bx * bx + by * by + bz * bz;
256 let mag = mag_sq.sqrt();
257
258 if mag < 1e-10 {
259 let mut result = GpuMultivector::scalar(1.0);
261 result.coeffs[3] = bx;
262 result.coeffs[5] = by;
263 result.coeffs[6] = bz;
264 return result;
265 }
266
267 let cos_mag = mag.cos();
268 let sin_mag_over_mag = mag.sin() / mag;
269
270 GpuMultivector {
271 coeffs: [
272 cos_mag,
273 0.0,
274 0.0,
275 bx * sin_mag_over_mag,
276 0.0,
277 by * sin_mag_over_mag,
278 bz * sin_mag_over_mag,
279 0.0,
280 ],
281 }
282}
283
284#[inline]
286pub fn lerp_simd(a: &GpuMultivector, b: &GpuMultivector, t: f32) -> GpuMultivector {
287 let a_vec = f32x8::from(a.coeffs);
288 let b_vec = f32x8::from(b.coeffs);
289 let t_vec = f32x8::splat(t);
290 let one_minus_t = f32x8::splat(1.0 - t);
291 let result = a_vec * one_minus_t + b_vec * t_vec;
292 GpuMultivector {
293 coeffs: result.to_array(),
294 }
295}
296
297pub fn rotor_slerp_simd(a: &GpuMultivector, b: &GpuMultivector, t: f32) -> GpuMultivector {
301 let dot = dot_simd(a, b);
303
304 let dot = dot.clamp(-1.0, 1.0);
306
307 if dot.abs() > 0.9995 {
309 let result = lerp_simd(a, b, t);
310 return normalize_simd(&result);
311 }
312
313 let a_rev = reverse_simd(a);
316 let ratio = geometric_product_simd(&a_rev, b);
317
318 let scalar = ratio.coeffs[0];
321 let theta = scalar.clamp(-1.0, 1.0).acos();
322
323 if theta.abs() < 1e-10 {
324 return *a;
325 }
326
327 let scale = t * theta / theta.sin();
329 let mut log_ratio = GpuMultivector::zero();
330 log_ratio.coeffs[3] = ratio.coeffs[3] * scale;
331 log_ratio.coeffs[5] = ratio.coeffs[5] * scale;
332 log_ratio.coeffs[6] = ratio.coeffs[6] * scale;
333
334 let exp_log = exp_bivector_simd(&log_ratio);
335 geometric_product_simd(a, &exp_log)
336}
337
338pub struct SimdBatch;
343
344impl SimdBatch {
345 pub fn geometric_product(a: &[GpuMultivector], b: &[GpuMultivector]) -> Vec<GpuMultivector> {
347 a.iter()
348 .zip(b.iter())
349 .map(|(a, b)| geometric_product_simd(a, b))
350 .collect()
351 }
352
353 pub fn addition(a: &[GpuMultivector], b: &[GpuMultivector]) -> Vec<GpuMultivector> {
355 a.iter()
356 .zip(b.iter())
357 .map(|(a, b)| addition_simd(a, b))
358 .collect()
359 }
360
361 pub fn sandwich(rotors: &[GpuMultivector], vectors: &[GpuMultivector]) -> Vec<GpuMultivector> {
363 rotors
364 .iter()
365 .zip(vectors.iter())
366 .map(|(r, v)| sandwich_simd(v, r))
367 .collect()
368 }
369
370 pub fn exp(bivectors: &[GpuMultivector]) -> Vec<GpuMultivector> {
372 bivectors.iter().map(exp_bivector_simd).collect()
373 }
374
375 pub fn rotor_slerp(a: &[GpuMultivector], b: &[GpuMultivector], t: f32) -> Vec<GpuMultivector> {
377 a.iter()
378 .zip(b.iter())
379 .map(|(a, b)| rotor_slerp_simd(a, b, t))
380 .collect()
381 }
382
383 pub fn normalize(mvs: &[GpuMultivector]) -> Vec<GpuMultivector> {
385 mvs.iter().map(normalize_simd).collect()
386 }
387
388 pub fn from_ga3(mvs: &[GA3]) -> Vec<GpuMultivector> {
390 mvs.iter().map(|mv| mv.into()).collect()
391 }
392
393 pub fn to_ga3(mvs: &[GpuMultivector]) -> Vec<GA3> {
395 mvs.iter().map(|mv| (*mv).into()).collect()
396 }
397}
398
399#[cfg(test)]
400mod tests {
401 use super::*;
402
403 #[test]
404 fn test_addition_simd() {
405 let a = GpuMultivector::vector(1.0, 2.0, 3.0);
406 let b = GpuMultivector::vector(4.0, 5.0, 6.0);
407 let result = addition_simd(&a, &b);
408 assert_eq!(result.get_vector(), (5.0, 7.0, 9.0));
409 }
410
411 #[test]
412 fn test_subtraction_simd() {
413 let a = GpuMultivector::vector(5.0, 7.0, 9.0);
414 let b = GpuMultivector::vector(1.0, 2.0, 3.0);
415 let result = subtraction_simd(&a, &b);
416 assert_eq!(result.get_vector(), (4.0, 5.0, 6.0));
417 }
418
419 #[test]
420 fn test_scalar_mul_simd() {
421 let a = GpuMultivector::vector(1.0, 2.0, 3.0);
422 let result = scalar_mul_simd(&a, 2.0);
423 assert_eq!(result.get_vector(), (2.0, 4.0, 6.0));
424 }
425
426 #[test]
427 fn test_geometric_product_scalars() {
428 let a = GpuMultivector::scalar(3.0);
429 let b = GpuMultivector::scalar(4.0);
430 let result = geometric_product_simd(&a, &b);
431 assert!((result.get_scalar() - 12.0).abs() < 1e-6);
432 }
433
434 #[test]
435 fn test_geometric_product_vectors() {
436 let e1 = GpuMultivector {
438 coeffs: [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
439 };
440 let result = geometric_product_simd(&e1, &e1);
441 assert!((result.get_scalar() - 1.0).abs() < 1e-6);
442 }
443
444 #[test]
445 fn test_geometric_product_bivector() {
446 let e1 = GpuMultivector {
448 coeffs: [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
449 };
450 let e2 = GpuMultivector {
451 coeffs: [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0],
452 };
453 let result = geometric_product_simd(&e1, &e2);
454 assert!((result.coeffs[3] - 1.0).abs() < 1e-6); }
456
457 #[test]
458 fn test_reverse_simd() {
459 let mut a = GpuMultivector::zero();
460 a.coeffs[0] = 1.0; a.coeffs[1] = 2.0; a.coeffs[3] = 3.0; a.coeffs[7] = 4.0; let rev = reverse_simd(&a);
466 assert_eq!(rev.coeffs[0], 1.0); assert_eq!(rev.coeffs[1], 2.0); assert_eq!(rev.coeffs[3], -3.0); assert_eq!(rev.coeffs[7], -4.0); }
471
472 #[test]
473 fn test_norm_simd() {
474 let v = GpuMultivector::vector(3.0, 4.0, 0.0);
475 let n = norm_simd(&v);
476 assert!((n - 5.0).abs() < 1e-6);
477 }
478
479 #[test]
480 fn test_normalize_simd() {
481 let v = GpuMultivector::vector(3.0, 4.0, 0.0);
482 let normalized = normalize_simd(&v);
483 let (x, y, z) = normalized.get_vector();
484 assert!((x - 0.6).abs() < 1e-6);
485 assert!((y - 0.8).abs() < 1e-6);
486 assert!(z.abs() < 1e-6);
487 }
488
489 #[test]
490 fn test_lerp_simd() {
491 let a = GpuMultivector::scalar(0.0);
492 let b = GpuMultivector::scalar(10.0);
493 let mid = lerp_simd(&a, &b, 0.5);
494 assert!((mid.get_scalar() - 5.0).abs() < 1e-6);
495 }
496
497 #[test]
498 fn test_exp_bivector_small() {
499 let mut b = GpuMultivector::zero();
501 b.coeffs[3] = 0.001; let result = exp_bivector_simd(&b);
503 assert!((result.coeffs[0] - 1.0).abs() < 0.01);
504 }
505
506 #[test]
507 fn test_sandwich_identity() {
508 let rotor = GpuMultivector::scalar(1.0);
510 let v = GpuMultivector::vector(1.0, 2.0, 3.0);
511 let result = sandwich_simd(&v, &rotor);
512 let (x, y, z) = result.get_vector();
513 assert!((x - 1.0).abs() < 1e-6);
514 assert!((y - 2.0).abs() < 1e-6);
515 assert!((z - 3.0).abs() < 1e-6);
516 }
517
518 #[test]
519 fn test_batch_operations() {
520 let a = vec![GpuMultivector::scalar(1.0), GpuMultivector::scalar(2.0)];
521 let b = vec![GpuMultivector::scalar(3.0), GpuMultivector::scalar(4.0)];
522
523 let results = SimdBatch::addition(&a, &b);
524 assert!((results[0].get_scalar() - 4.0).abs() < 1e-6);
525 assert!((results[1].get_scalar() - 6.0).abs() < 1e-6);
526 }
527}