Skip to main content

cliffy_gpu/
simd.rs

1//! SIMD-optimized CPU operations for geometric algebra.
2//!
3//! This module provides CPU-based SIMD acceleration for geometric algebra
4//! operations when GPU is unavailable or for small batch sizes where GPU
5//! overhead would be too high.
6//!
7//! Uses the `wide` crate for portable SIMD that works on x86, ARM, and WASM.
8
9use wide::f32x8;
10
11use crate::GpuMultivector;
12use cliffy_core::GA3;
13
14/// SIMD-optimized geometric product for Cl(3,0).
15///
16/// Computes the geometric product of two multivectors using SIMD
17/// operations where possible. The Cl(3,0) geometric product follows
18/// the multiplication table for basis elements {1, e1, e2, e12, e3, e13, e23, e123}.
19pub fn geometric_product_simd(a: &GpuMultivector, b: &GpuMultivector) -> GpuMultivector {
20    // For Cl(3,0), the geometric product expands to:
21    // result[i] = sum over j,k where basis[j] * basis[k] = ±basis[i]
22    //
23    // The sign table for Cl(3,0) is:
24    // e1*e1 = 1, e2*e2 = 1, e3*e3 = 1
25    // e1*e2 = e12, e2*e1 = -e12
26    // e1*e3 = e13, e3*e1 = -e13
27    // e2*e3 = e23, e3*e2 = -e23
28    // etc.
29
30    // Scalar component: 1*1 + e1*e1 + e2*e2 + e12*e12 + e3*e3 + e13*e13 + e23*e23 + e123*e123
31    // Note: e12*e12 = e1*e2*e1*e2 = -e1*e1*e2*e2 = -1
32    //       e123*e123 = e1*e2*e3*e1*e2*e3 = -1
33    let c0 = a.coeffs[0] * b.coeffs[0]   // 1*1
34           + a.coeffs[1] * b.coeffs[1]   // e1*e1 = 1
35           + a.coeffs[2] * b.coeffs[2]   // e2*e2 = 1
36           - a.coeffs[3] * b.coeffs[3]   // e12*e12 = -1
37           + a.coeffs[4] * b.coeffs[4]   // e3*e3 = 1
38           - a.coeffs[5] * b.coeffs[5]   // e13*e13 = -1
39           - a.coeffs[6] * b.coeffs[6]   // e23*e23 = -1
40           - a.coeffs[7] * b.coeffs[7]; // e123*e123 = -1
41
42    // e1 component
43    let c1 = a.coeffs[0] * b.coeffs[1]   // 1*e1
44           + a.coeffs[1] * b.coeffs[0]   // e1*1
45           - a.coeffs[2] * b.coeffs[3]   // e2*e12 = -e1
46           + a.coeffs[3] * b.coeffs[2]   // e12*e2 = e1
47           - a.coeffs[4] * b.coeffs[5]   // e3*e13 = -e1
48           + a.coeffs[5] * b.coeffs[4]   // e13*e3 = e1
49           + a.coeffs[6] * b.coeffs[7]   // e23*e123 = e1
50           - a.coeffs[7] * b.coeffs[6]; // e123*e23 = -e1
51
52    // e2 component
53    let c2 = a.coeffs[0] * b.coeffs[2]   // 1*e2
54           + a.coeffs[1] * b.coeffs[3]   // e1*e12 = e2
55           + a.coeffs[2] * b.coeffs[0]   // e2*1
56           - a.coeffs[3] * b.coeffs[1]   // e12*e1 = -e2
57           - a.coeffs[4] * b.coeffs[6]   // e3*e23 = -e2
58           - a.coeffs[5] * b.coeffs[7]   // e13*e123 = -e2
59           + a.coeffs[6] * b.coeffs[4]   // e23*e3 = e2
60           + a.coeffs[7] * b.coeffs[5]; // e123*e13 = e2
61
62    // e12 component
63    let c3 = a.coeffs[0] * b.coeffs[3]   // 1*e12
64           + a.coeffs[1] * b.coeffs[2]   // e1*e2 = e12
65           - a.coeffs[2] * b.coeffs[1]   // e2*e1 = -e12
66           + a.coeffs[3] * b.coeffs[0]   // e12*1
67           + a.coeffs[4] * b.coeffs[7]   // e3*e123 = e12
68           + a.coeffs[5] * b.coeffs[6]   // e13*e23 = e12
69           - a.coeffs[6] * b.coeffs[5]   // e23*e13 = -e12
70           + a.coeffs[7] * b.coeffs[4]; // e123*e3 = e12
71
72    // e3 component
73    let c4 = a.coeffs[0] * b.coeffs[4]   // 1*e3
74           + a.coeffs[1] * b.coeffs[5]   // e1*e13 = e3
75           + a.coeffs[2] * b.coeffs[6]   // e2*e23 = e3
76           + a.coeffs[3] * b.coeffs[7]   // e12*e123 = e3
77           + a.coeffs[4] * b.coeffs[0]   // e3*1
78           - a.coeffs[5] * b.coeffs[1]   // e13*e1 = -e3
79           - a.coeffs[6] * b.coeffs[2]   // e23*e2 = -e3
80           - a.coeffs[7] * b.coeffs[3]; // e123*e12 = -e3
81
82    // e13 component
83    let c5 = a.coeffs[0] * b.coeffs[5]   // 1*e13
84           + a.coeffs[1] * b.coeffs[4]   // e1*e3 = e13
85           - a.coeffs[2] * b.coeffs[7]   // e2*e123 = -e13
86           - a.coeffs[3] * b.coeffs[6]   // e12*e23 = -e13
87           - a.coeffs[4] * b.coeffs[1]   // e3*e1 = -e13
88           + a.coeffs[5] * b.coeffs[0]   // e13*1
89           + a.coeffs[6] * b.coeffs[3]   // e23*e12 = e13
90           + a.coeffs[7] * b.coeffs[2]; // e123*e2 = e13
91
92    // e23 component
93    let c6 = a.coeffs[0] * b.coeffs[6]   // 1*e23
94           + a.coeffs[1] * b.coeffs[7]   // e1*e123 = e23
95           + a.coeffs[2] * b.coeffs[4]   // e2*e3 = e23
96           + a.coeffs[3] * b.coeffs[5]   // e12*e13 = e23
97           - a.coeffs[4] * b.coeffs[2]   // e3*e2 = -e23
98           - a.coeffs[5] * b.coeffs[3]   // e13*e12 = -e23
99           + a.coeffs[6] * b.coeffs[0]   // e23*1
100           - a.coeffs[7] * b.coeffs[1]; // e123*e1 = -e23
101
102    // e123 component
103    let c7 = a.coeffs[0] * b.coeffs[7]   // 1*e123
104           + a.coeffs[1] * b.coeffs[6]   // e1*e23 = e123
105           - a.coeffs[2] * b.coeffs[5]   // e2*e13 = -e123
106           + a.coeffs[3] * b.coeffs[4]   // e12*e3 = e123
107           + a.coeffs[4] * b.coeffs[3]   // e3*e12 = e123
108           - a.coeffs[5] * b.coeffs[2]   // e13*e2 = -e123
109           + a.coeffs[6] * b.coeffs[1]   // e23*e1 = e123
110           + a.coeffs[7] * b.coeffs[0]; // e123*1
111
112    GpuMultivector {
113        coeffs: [c0, c1, c2, c3, c4, c5, c6, c7],
114    }
115}
116
117/// SIMD-optimized addition of two multivectors.
118#[inline]
119pub fn addition_simd(a: &GpuMultivector, b: &GpuMultivector) -> GpuMultivector {
120    let a_vec = f32x8::from(a.coeffs);
121    let b_vec = f32x8::from(b.coeffs);
122    let result = a_vec + b_vec;
123    GpuMultivector {
124        coeffs: result.to_array(),
125    }
126}
127
128/// SIMD-optimized subtraction of two multivectors.
129#[inline]
130pub fn subtraction_simd(a: &GpuMultivector, b: &GpuMultivector) -> GpuMultivector {
131    let a_vec = f32x8::from(a.coeffs);
132    let b_vec = f32x8::from(b.coeffs);
133    let result = a_vec - b_vec;
134    GpuMultivector {
135        coeffs: result.to_array(),
136    }
137}
138
139/// SIMD-optimized scalar multiplication.
140#[inline]
141pub fn scalar_mul_simd(a: &GpuMultivector, scalar: f32) -> GpuMultivector {
142    let a_vec = f32x8::from(a.coeffs);
143    let s_vec = f32x8::splat(scalar);
144    let result = a_vec * s_vec;
145    GpuMultivector {
146        coeffs: result.to_array(),
147    }
148}
149
150/// SIMD-optimized reverse (reversion) of a multivector.
151///
152/// For Cl(3,0), the reverse negates bivector and pseudoscalar components:
153/// ~(a + b*e12 + c*e13 + d*e23 + e*e123) = a - b*e12 - c*e13 - d*e23 - e*e123
154#[inline]
155pub fn reverse_simd(a: &GpuMultivector) -> GpuMultivector {
156    // Reverse: negate grades 2 and 3
157    // Grade 0 (scalar): coeffs[0] - keep
158    // Grade 1 (vectors): coeffs[1,2,4] - keep
159    // Grade 2 (bivectors): coeffs[3,5,6] - negate
160    // Grade 3 (pseudoscalar): coeffs[7] - negate
161    let signs = f32x8::from([1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, -1.0]);
162    let a_vec = f32x8::from(a.coeffs);
163    let result = a_vec * signs;
164    GpuMultivector {
165        coeffs: result.to_array(),
166    }
167}
168
169/// SIMD-optimized grade involution (main involution).
170///
171/// Negates odd-grade components.
172#[inline]
173pub fn grade_involution_simd(a: &GpuMultivector) -> GpuMultivector {
174    // Grade involution: negate odd grades
175    // Grade 0: keep, Grade 1: negate, Grade 2: keep, Grade 3: negate
176    let signs = f32x8::from([1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0]);
177    let a_vec = f32x8::from(a.coeffs);
178    let result = a_vec * signs;
179    GpuMultivector {
180        coeffs: result.to_array(),
181    }
182}
183
184/// SIMD-optimized conjugate (Clifford conjugate).
185///
186/// Combines reversion and grade involution.
187#[inline]
188pub fn conjugate_simd(a: &GpuMultivector) -> GpuMultivector {
189    // Conjugate = reverse ∘ grade_involution
190    // Grade 0: keep, Grade 1: negate, Grade 2: negate, Grade 3: keep
191    let signs = f32x8::from([1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0]);
192    let a_vec = f32x8::from(a.coeffs);
193    let result = a_vec * signs;
194    GpuMultivector {
195        coeffs: result.to_array(),
196    }
197}
198
199/// SIMD-optimized squared magnitude (norm squared).
200#[inline]
201pub fn norm_squared_simd(a: &GpuMultivector) -> f32 {
202    // ||a||² = a * ~a (scalar part only)
203    let rev = reverse_simd(a);
204    let prod = geometric_product_simd(a, &rev);
205    prod.coeffs[0]
206}
207
208/// SIMD-optimized norm (magnitude).
209#[inline]
210pub fn norm_simd(a: &GpuMultivector) -> f32 {
211    norm_squared_simd(a).sqrt()
212}
213
214/// SIMD-optimized normalization.
215#[inline]
216pub fn normalize_simd(a: &GpuMultivector) -> GpuMultivector {
217    let n = norm_simd(a);
218    if n > 1e-10 {
219        scalar_mul_simd(a, 1.0 / n)
220    } else {
221        GpuMultivector::zero()
222    }
223}
224
225/// SIMD-optimized sandwich product: b * a * ~b
226///
227/// Used for rotations and reflections.
228#[inline]
229pub fn sandwich_simd(a: &GpuMultivector, b: &GpuMultivector) -> GpuMultivector {
230    let b_rev = reverse_simd(b);
231    let temp = geometric_product_simd(b, a);
232    geometric_product_simd(&temp, &b_rev)
233}
234
235/// SIMD-optimized dot product (inner product).
236#[inline]
237pub fn dot_simd(a: &GpuMultivector, b: &GpuMultivector) -> f32 {
238    let a_vec = f32x8::from(a.coeffs);
239    let b_vec = f32x8::from(b.coeffs);
240    let prod = a_vec * b_vec;
241    // Sum all elements
242    prod.reduce_add()
243}
244
245/// SIMD-optimized exponential for bivectors (creates rotors).
246///
247/// For a bivector B, exp(B) = cos(|B|) + sin(|B|) * B/|B|
248pub fn exp_bivector_simd(b: &GpuMultivector) -> GpuMultivector {
249    // Extract bivector components
250    let bx = b.coeffs[3]; // e12
251    let by = b.coeffs[5]; // e13
252    let bz = b.coeffs[6]; // e23
253
254    // Compute magnitude of bivector
255    let mag_sq = bx * bx + by * by + bz * bz;
256    let mag = mag_sq.sqrt();
257
258    if mag < 1e-10 {
259        // For small bivector, exp(B) ≈ 1 + B
260        let mut result = GpuMultivector::scalar(1.0);
261        result.coeffs[3] = bx;
262        result.coeffs[5] = by;
263        result.coeffs[6] = bz;
264        return result;
265    }
266
267    let cos_mag = mag.cos();
268    let sin_mag_over_mag = mag.sin() / mag;
269
270    GpuMultivector {
271        coeffs: [
272            cos_mag,
273            0.0,
274            0.0,
275            bx * sin_mag_over_mag,
276            0.0,
277            by * sin_mag_over_mag,
278            bz * sin_mag_over_mag,
279            0.0,
280        ],
281    }
282}
283
284/// SIMD-optimized linear interpolation.
285#[inline]
286pub fn lerp_simd(a: &GpuMultivector, b: &GpuMultivector, t: f32) -> GpuMultivector {
287    let a_vec = f32x8::from(a.coeffs);
288    let b_vec = f32x8::from(b.coeffs);
289    let t_vec = f32x8::splat(t);
290    let one_minus_t = f32x8::splat(1.0 - t);
291    let result = a_vec * one_minus_t + b_vec * t_vec;
292    GpuMultivector {
293        coeffs: result.to_array(),
294    }
295}
296
297/// SIMD-optimized rotor spherical linear interpolation (SLERP).
298///
299/// Interpolates smoothly between two rotors.
300pub fn rotor_slerp_simd(a: &GpuMultivector, b: &GpuMultivector, t: f32) -> GpuMultivector {
301    // Compute dot product to find angle between rotors
302    let dot = dot_simd(a, b);
303
304    // Clamp dot product to valid range
305    let dot = dot.clamp(-1.0, 1.0);
306
307    // If rotors are very close, use linear interpolation
308    if dot.abs() > 0.9995 {
309        let result = lerp_simd(a, b, t);
310        return normalize_simd(&result);
311    }
312
313    // Compute interpolation using geometric algebra
314    // slerp(a, b, t) = a * exp(t * log(~a * b))
315    let a_rev = reverse_simd(a);
316    let ratio = geometric_product_simd(&a_rev, b);
317
318    // Extract bivector part for log
319    // For a rotor r = cos(θ) + sin(θ)B, log(r) = θB
320    let scalar = ratio.coeffs[0];
321    let theta = scalar.clamp(-1.0, 1.0).acos();
322
323    if theta.abs() < 1e-10 {
324        return *a;
325    }
326
327    // Scale the bivector by t*theta/sin(theta)
328    let scale = t * theta / theta.sin();
329    let mut log_ratio = GpuMultivector::zero();
330    log_ratio.coeffs[3] = ratio.coeffs[3] * scale;
331    log_ratio.coeffs[5] = ratio.coeffs[5] * scale;
332    log_ratio.coeffs[6] = ratio.coeffs[6] * scale;
333
334    let exp_log = exp_bivector_simd(&log_ratio);
335    geometric_product_simd(a, &exp_log)
336}
337
338/// Batch processing context for SIMD operations.
339///
340/// Provides efficient batch processing of geometric algebra operations
341/// using SIMD acceleration.
342pub struct SimdBatch;
343
344impl SimdBatch {
345    /// Batch geometric product using SIMD.
346    pub fn geometric_product(a: &[GpuMultivector], b: &[GpuMultivector]) -> Vec<GpuMultivector> {
347        a.iter()
348            .zip(b.iter())
349            .map(|(a, b)| geometric_product_simd(a, b))
350            .collect()
351    }
352
353    /// Batch addition using SIMD.
354    pub fn addition(a: &[GpuMultivector], b: &[GpuMultivector]) -> Vec<GpuMultivector> {
355        a.iter()
356            .zip(b.iter())
357            .map(|(a, b)| addition_simd(a, b))
358            .collect()
359    }
360
361    /// Batch sandwich product using SIMD.
362    pub fn sandwich(rotors: &[GpuMultivector], vectors: &[GpuMultivector]) -> Vec<GpuMultivector> {
363        rotors
364            .iter()
365            .zip(vectors.iter())
366            .map(|(r, v)| sandwich_simd(v, r))
367            .collect()
368    }
369
370    /// Batch exponential using SIMD.
371    pub fn exp(bivectors: &[GpuMultivector]) -> Vec<GpuMultivector> {
372        bivectors.iter().map(exp_bivector_simd).collect()
373    }
374
375    /// Batch rotor SLERP using SIMD.
376    pub fn rotor_slerp(a: &[GpuMultivector], b: &[GpuMultivector], t: f32) -> Vec<GpuMultivector> {
377        a.iter()
378            .zip(b.iter())
379            .map(|(a, b)| rotor_slerp_simd(a, b, t))
380            .collect()
381    }
382
383    /// Batch normalize using SIMD.
384    pub fn normalize(mvs: &[GpuMultivector]) -> Vec<GpuMultivector> {
385        mvs.iter().map(normalize_simd).collect()
386    }
387
388    /// Convert from GA3 to GPU format.
389    pub fn from_ga3(mvs: &[GA3]) -> Vec<GpuMultivector> {
390        mvs.iter().map(|mv| mv.into()).collect()
391    }
392
393    /// Convert from GPU format to GA3.
394    pub fn to_ga3(mvs: &[GpuMultivector]) -> Vec<GA3> {
395        mvs.iter().map(|mv| (*mv).into()).collect()
396    }
397}
398
399#[cfg(test)]
400mod tests {
401    use super::*;
402
403    #[test]
404    fn test_addition_simd() {
405        let a = GpuMultivector::vector(1.0, 2.0, 3.0);
406        let b = GpuMultivector::vector(4.0, 5.0, 6.0);
407        let result = addition_simd(&a, &b);
408        assert_eq!(result.get_vector(), (5.0, 7.0, 9.0));
409    }
410
411    #[test]
412    fn test_subtraction_simd() {
413        let a = GpuMultivector::vector(5.0, 7.0, 9.0);
414        let b = GpuMultivector::vector(1.0, 2.0, 3.0);
415        let result = subtraction_simd(&a, &b);
416        assert_eq!(result.get_vector(), (4.0, 5.0, 6.0));
417    }
418
419    #[test]
420    fn test_scalar_mul_simd() {
421        let a = GpuMultivector::vector(1.0, 2.0, 3.0);
422        let result = scalar_mul_simd(&a, 2.0);
423        assert_eq!(result.get_vector(), (2.0, 4.0, 6.0));
424    }
425
426    #[test]
427    fn test_geometric_product_scalars() {
428        let a = GpuMultivector::scalar(3.0);
429        let b = GpuMultivector::scalar(4.0);
430        let result = geometric_product_simd(&a, &b);
431        assert!((result.get_scalar() - 12.0).abs() < 1e-6);
432    }
433
434    #[test]
435    fn test_geometric_product_vectors() {
436        // e1 * e1 = 1
437        let e1 = GpuMultivector {
438            coeffs: [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
439        };
440        let result = geometric_product_simd(&e1, &e1);
441        assert!((result.get_scalar() - 1.0).abs() < 1e-6);
442    }
443
444    #[test]
445    fn test_geometric_product_bivector() {
446        // e1 * e2 = e12
447        let e1 = GpuMultivector {
448            coeffs: [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
449        };
450        let e2 = GpuMultivector {
451            coeffs: [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0],
452        };
453        let result = geometric_product_simd(&e1, &e2);
454        assert!((result.coeffs[3] - 1.0).abs() < 1e-6); // e12 component
455    }
456
457    #[test]
458    fn test_reverse_simd() {
459        let mut a = GpuMultivector::zero();
460        a.coeffs[0] = 1.0; // scalar
461        a.coeffs[1] = 2.0; // e1
462        a.coeffs[3] = 3.0; // e12 (bivector)
463        a.coeffs[7] = 4.0; // e123 (pseudoscalar)
464
465        let rev = reverse_simd(&a);
466        assert_eq!(rev.coeffs[0], 1.0); // scalar unchanged
467        assert_eq!(rev.coeffs[1], 2.0); // vector unchanged
468        assert_eq!(rev.coeffs[3], -3.0); // bivector negated
469        assert_eq!(rev.coeffs[7], -4.0); // pseudoscalar negated
470    }
471
472    #[test]
473    fn test_norm_simd() {
474        let v = GpuMultivector::vector(3.0, 4.0, 0.0);
475        let n = norm_simd(&v);
476        assert!((n - 5.0).abs() < 1e-6);
477    }
478
479    #[test]
480    fn test_normalize_simd() {
481        let v = GpuMultivector::vector(3.0, 4.0, 0.0);
482        let normalized = normalize_simd(&v);
483        let (x, y, z) = normalized.get_vector();
484        assert!((x - 0.6).abs() < 1e-6);
485        assert!((y - 0.8).abs() < 1e-6);
486        assert!(z.abs() < 1e-6);
487    }
488
489    #[test]
490    fn test_lerp_simd() {
491        let a = GpuMultivector::scalar(0.0);
492        let b = GpuMultivector::scalar(10.0);
493        let mid = lerp_simd(&a, &b, 0.5);
494        assert!((mid.get_scalar() - 5.0).abs() < 1e-6);
495    }
496
497    #[test]
498    fn test_exp_bivector_small() {
499        // Small bivector: exp(B) ≈ 1 + B
500        let mut b = GpuMultivector::zero();
501        b.coeffs[3] = 0.001; // small e12 component
502        let result = exp_bivector_simd(&b);
503        assert!((result.coeffs[0] - 1.0).abs() < 0.01);
504    }
505
506    #[test]
507    fn test_sandwich_identity() {
508        // Sandwich with identity rotor should preserve the vector
509        let rotor = GpuMultivector::scalar(1.0);
510        let v = GpuMultivector::vector(1.0, 2.0, 3.0);
511        let result = sandwich_simd(&v, &rotor);
512        let (x, y, z) = result.get_vector();
513        assert!((x - 1.0).abs() < 1e-6);
514        assert!((y - 2.0).abs() < 1e-6);
515        assert!((z - 3.0).abs() < 1e-6);
516    }
517
518    #[test]
519    fn test_batch_operations() {
520        let a = vec![GpuMultivector::scalar(1.0), GpuMultivector::scalar(2.0)];
521        let b = vec![GpuMultivector::scalar(3.0), GpuMultivector::scalar(4.0)];
522
523        let results = SimdBatch::addition(&a, &b);
524        assert!((results[0].get_scalar() - 4.0).abs() < 1e-6);
525        assert!((results[1].get_scalar() - 6.0).abs() < 1e-6);
526    }
527}