amari_gpu/
shaders.rs

1//! WebGPU compute shader library for mathematical operations
2//!
3//! This module contains optimized WGSL compute shaders for various mathematical
4//! domains including tropical algebra, automatic differentiation, and fusion systems.
5
6use std::collections::HashMap;
7
8/// Shader library managing all mathematical compute shaders
9pub struct ShaderLibrary {
10    shaders: HashMap<String, &'static str>,
11}
12
13impl ShaderLibrary {
14    /// Create new shader library with all mathematical shaders
15    pub fn new() -> Self {
16        let mut shaders = HashMap::new();
17
18        // Tropical algebra shaders
19        shaders.insert(
20            "tropical_matrix_multiply".to_string(),
21            TROPICAL_MATRIX_MULTIPLY,
22        );
23        shaders.insert("tropical_vector_add".to_string(), TROPICAL_VECTOR_ADD);
24        shaders.insert(
25            "tropical_neural_network".to_string(),
26            TROPICAL_NEURAL_NETWORK,
27        );
28
29        // Dual number shaders
30        shaders.insert("dual_forward_ad".to_string(), DUAL_FORWARD_AD);
31        shaders.insert("dual_batch_gradient".to_string(), DUAL_BATCH_GRADIENT);
32        shaders.insert("dual_chain_rule".to_string(), DUAL_CHAIN_RULE);
33
34        // Fusion system shaders
35        shaders.insert("tropical_dual_clifford".to_string(), TROPICAL_DUAL_CLIFFORD);
36        shaders.insert("fusion_attention".to_string(), FUSION_ATTENTION);
37
38        // Information geometry shaders
39        shaders.insert("fisher_information".to_string(), FISHER_INFORMATION);
40        shaders.insert("kl_divergence_batch".to_string(), KL_DIVERGENCE_BATCH);
41
42        // Cellular automata shaders
43        shaders.insert("ca_evolution".to_string(), CA_EVOLUTION);
44        shaders.insert("ca_self_assembly".to_string(), CA_SELF_ASSEMBLY);
45        shaders.insert("rule_application".to_string(), RULE_APPLICATION);
46        shaders.insert("energy_calculation".to_string(), ENERGY_CALCULATION);
47        shaders.insert("neighbor_extraction".to_string(), NEIGHBOR_EXTRACTION);
48
49        // Enumerative geometry shaders
50        shaders.insert("intersection_theory".to_string(), INTERSECTION_THEORY);
51        shaders.insert("schubert_calculus".to_string(), SCHUBERT_CALCULUS);
52
53        // Holographic memory shaders
54        shaders.insert("holographic_batch_bind".to_string(), HOLOGRAPHIC_BATCH_BIND);
55        shaders.insert(
56            "holographic_batch_similarity".to_string(),
57            HOLOGRAPHIC_BATCH_SIMILARITY,
58        );
59        shaders.insert("holographic_bundle_all".to_string(), HOLOGRAPHIC_BUNDLE_ALL);
60        shaders.insert(
61            "holographic_resonator_step".to_string(),
62            HOLOGRAPHIC_RESONATOR_STEP,
63        );
64
65        Self { shaders }
66    }
67
68    /// Get shader source by name
69    pub fn get_shader(&self, name: &str) -> Option<&'static str> {
70        self.shaders.get(name).copied()
71    }
72
73    /// List all available shaders
74    pub fn list_shaders(&self) -> Vec<String> {
75        self.shaders.keys().cloned().collect()
76    }
77}
78
79impl Default for ShaderLibrary {
80    fn default() -> Self {
81        Self::new()
82    }
83}
84
85/// Tropical algebra shader collection
86pub const TROPICAL_SHADERS: &[(&str, &str)] = &[
87    ("tropical_matrix_multiply", TROPICAL_MATRIX_MULTIPLY),
88    ("tropical_vector_add", TROPICAL_VECTOR_ADD),
89    ("tropical_neural_network", TROPICAL_NEURAL_NETWORK),
90];
91
92/// Dual number shader collection
93pub const DUAL_SHADERS: &[(&str, &str)] = &[
94    ("dual_forward_ad", DUAL_FORWARD_AD),
95    ("dual_batch_gradient", DUAL_BATCH_GRADIENT),
96    ("dual_chain_rule", DUAL_CHAIN_RULE),
97];
98
99/// Fusion system shader collection
100pub const FUSION_SHADERS: &[(&str, &str)] = &[
101    ("tropical_dual_clifford", TROPICAL_DUAL_CLIFFORD),
102    ("fusion_attention", FUSION_ATTENTION),
103];
104
105/// Holographic memory shader collection
106pub const HOLOGRAPHIC_SHADERS: &[(&str, &str)] = &[
107    ("holographic_batch_bind", HOLOGRAPHIC_BATCH_BIND),
108    ("holographic_batch_similarity", HOLOGRAPHIC_BATCH_SIMILARITY),
109    ("holographic_bundle_all", HOLOGRAPHIC_BUNDLE_ALL),
110    ("holographic_resonator_step", HOLOGRAPHIC_RESONATOR_STEP),
111];
112
113// =====================================================================
114// TROPICAL ALGEBRA SHADERS
115// =====================================================================
116
117/// Tropical (max-plus) matrix multiplication: C = A ⊗ B where ⊗ is tropical product
118const TROPICAL_MATRIX_MULTIPLY: &str = r#"
119@group(0) @binding(0) var<storage, read> matrix_a: array<f32>;
120@group(0) @binding(1) var<storage, read> matrix_b: array<f32>;
121@group(0) @binding(2) var<storage, read_write> result: array<f32>;
122@group(0) @binding(3) var<storage, read> dimensions: array<u32>; // [M, N, K]
123
124@compute @workgroup_size(16, 16)
125fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
126    let M = dimensions[0];
127    let N = dimensions[1];
128    let K = dimensions[2];
129
130    let row = global_id.x;
131    let col = global_id.y;
132
133    if (row >= M || col >= N) {
134        return;
135    }
136
137    // Tropical matrix multiplication: (A ⊗ B)[i,j] = max_k(A[i,k] + B[k,j])
138    var max_val = -3.4028235e+38; // -infinity in tropical algebra
139
140    for (var k = 0u; k < K; k = k + 1u) {
141        let a_val = matrix_a[row * K + k];
142        let b_val = matrix_b[k * N + col];
143
144        // Tropical multiplication: a ⊗ b = a + b
145        let tropical_product = a_val + b_val;
146
147        // Tropical addition: max operation
148        if (tropical_product > max_val) {
149            max_val = tropical_product;
150        }
151    }
152
153    result[row * N + col] = max_val;
154}
155"#;
156
157/// Tropical vector addition: c = a ⊕ b where ⊕ is max operation
158const TROPICAL_VECTOR_ADD: &str = r#"
159@group(0) @binding(0) var<storage, read> vector_a: array<f32>;
160@group(0) @binding(1) var<storage, read> vector_b: array<f32>;
161@group(0) @binding(2) var<storage, read_write> result: array<f32>;
162
163@compute @workgroup_size(256)
164fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
165    let idx = global_id.x;
166
167    if (idx >= arrayLength(&vector_a)) {
168        return;
169    }
170
171    // Tropical addition: a ⊕ b = max(a, b)
172    result[idx] = max(vector_a[idx], vector_b[idx]);
173}
174"#;
175
176/// Tropical neural network layer computation
177const TROPICAL_NEURAL_NETWORK: &str = r#"
178@group(0) @binding(0) var<storage, read> input: array<f32>;
179@group(0) @binding(1) var<storage, read> weights: array<f32>;
180@group(0) @binding(2) var<storage, read> bias: array<f32>;
181@group(0) @binding(3) var<storage, read_write> output: array<f32>;
182@group(0) @binding(4) var<storage, read> dimensions: array<u32>; // [batch_size, input_size, output_size]
183
184@compute @workgroup_size(16, 16)
185fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
186    let batch_idx = global_id.x;
187    let output_idx = global_id.y;
188
189    let batch_size = dimensions[0];
190    let input_size = dimensions[1];
191    let output_size = dimensions[2];
192
193    if (batch_idx >= batch_size || output_idx >= output_size) {
194        return;
195    }
196
197    // Tropical neural network: max-plus linear transformation
198    var max_val = -3.4028235e+38; // -infinity
199
200    for (var i = 0u; i < input_size; i = i + 1u) {
201        let input_val = input[batch_idx * input_size + i];
202        let weight_val = weights[i * output_size + output_idx];
203
204        // Tropical multiplication: input ⊗ weight = input + weight
205        let product = input_val + weight_val;
206
207        // Tropical addition: max operation
208        if (product > max_val) {
209            max_val = product;
210        }
211    }
212
213    // Add bias (tropical addition = max)
214    let bias_val = bias[output_idx];
215    let final_result = max(max_val, bias_val);
216
217    output[batch_idx * output_size + output_idx] = final_result;
218}
219"#;
220
221// =====================================================================
222// DUAL NUMBER SHADERS (AUTOMATIC DIFFERENTIATION)
223// =====================================================================
224
225/// Forward-mode automatic differentiation for dual numbers
226const DUAL_FORWARD_AD: &str = r#"
227struct DualNumber {
228    real: f32,
229    dual: f32, // derivative part
230}
231
232@group(0) @binding(0) var<storage, read> input_dual: array<DualNumber>;
233@group(0) @binding(1) var<storage, read> operation_params: array<f32>;
234@group(0) @binding(2) var<storage, read_write> output_dual: array<DualNumber>;
235
236@compute @workgroup_size(256)
237fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
238    let idx = global_id.x;
239
240    if (idx >= arrayLength(&input_dual)) {
241        return;
242    }
243
244    let x = input_dual[idx];
245    let op_type = u32(operation_params[0]); // Operation type
246
247    var result: DualNumber;
248
249    // Forward-mode AD for different operations
250    switch (op_type) {
251        case 0u: { // sin(x): (sin(x), cos(x) * dx)
252            result.real = sin(x.real);
253            result.dual = cos(x.real) * x.dual;
254        }
255        case 1u: { // exp(x): (exp(x), exp(x) * dx)
256            let exp_val = exp(x.real);
257            result.real = exp_val;
258            result.dual = exp_val * x.dual;
259        }
260        case 2u: { // x^2: (x^2, 2x * dx)
261            result.real = x.real * x.real;
262            result.dual = 2.0 * x.real * x.dual;
263        }
264        case 3u: { // log(x): (log(x), (1/x) * dx)
265            result.real = log(x.real);
266            result.dual = x.dual / x.real;
267        }
268        default: { // identity
269            result = x;
270        }
271    }
272
273    output_dual[idx] = result;
274}
275"#;
276
277/// Batch gradient computation for multiple functions
278const DUAL_BATCH_GRADIENT: &str = r#"
279struct DualNumber {
280    real: f32,
281    dual: f32,
282}
283
284@group(0) @binding(0) var<storage, read> input_batch: array<DualNumber>;
285@group(0) @binding(1) var<storage, read> function_params: array<f32>;
286@group(0) @binding(2) var<storage, read_write> gradients: array<f32>;
287@group(0) @binding(3) var<storage, read> batch_info: array<u32>; // [batch_size, function_dim]
288
289@compute @workgroup_size(16, 16)
290fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
291    let batch_idx = global_id.x;
292    let var_idx = global_id.y;
293
294    let batch_size = batch_info[0];
295    let function_dim = batch_info[1];
296
297    if (batch_idx >= batch_size || var_idx >= function_dim) {
298        return;
299    }
300
301    let input_idx = batch_idx * function_dim + var_idx;
302    let x = input_batch[input_idx];
303
304    // Compute gradient of composite function f(g(x)) where g is parameterized
305    let param_idx = var_idx % 4u; // Assume up to 4 parameters per function
306    let param = function_params[param_idx];
307
308    // Example: f(x) = param * x^2 + sin(x), gradient = 2 * param * x + cos(x)
309    let gradient = 2.0 * param * x.real + cos(x.real);
310
311    gradients[input_idx] = gradient * x.dual;
312}
313"#;
314
315/// Chain rule implementation for complex function compositions
316const DUAL_CHAIN_RULE: &str = r#"
317struct DualNumber {
318    real: f32,
319    dual: f32,
320}
321
322@group(0) @binding(0) var<storage, read> inner_function: array<DualNumber>;
323@group(0) @binding(1) var<storage, read> outer_params: array<f32>;
324@group(0) @binding(2) var<storage, read_write> composed_result: array<DualNumber>;
325
326@compute @workgroup_size(256)
327fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
328    let idx = global_id.x;
329
330    if (idx >= arrayLength(&inner_function)) {
331        return;
332    }
333
334    let u = inner_function[idx]; // u = g(x), du/dx
335    let outer_type = u32(outer_params[0]);
336
337    var result: DualNumber;
338
339    // Chain rule: d/dx[f(g(x))] = f'(g(x)) * g'(x) = f'(u) * du/dx
340    switch (outer_type) {
341        case 0u: { // f(u) = sin(u)
342            result.real = sin(u.real);
343            result.dual = cos(u.real) * u.dual; // cos(u) * du/dx
344        }
345        case 1u: { // f(u) = u^3
346            result.real = u.real * u.real * u.real;
347            result.dual = 3.0 * u.real * u.real * u.dual; // 3u^2 * du/dx
348        }
349        case 2u: { // f(u) = exp(u)
350            let exp_u = exp(u.real);
351            result.real = exp_u;
352            result.dual = exp_u * u.dual; // exp(u) * du/dx
353        }
354        default: { // f(u) = u (identity)
355            result = u;
356        }
357    }
358
359    composed_result[idx] = result;
360}
361"#;
362
363// =====================================================================
364// FUSION SYSTEM SHADERS
365// =====================================================================
366
367/// TropicalDualClifford operations for LLM evaluation
368const TROPICAL_DUAL_CLIFFORD: &str = r#"
369struct TropicalNumber {
370    value: f32, // Tropical number value
371}
372
373struct DualNumber {
374    real: f32,
375    dual: f32,
376}
377
378struct Multivector {
379    coeffs: array<f32, 8>, // 3D Clifford algebra: 8 basis elements
380}
381
382struct TropicalDualClifford {
383    tropical: TropicalNumber,
384    dual: DualNumber,
385    clifford: Multivector,
386}
387
388@group(0) @binding(0) var<storage, read> input_batch: array<TropicalDualClifford>;
389@group(0) @binding(1) var<storage, read> operation_params: array<f32>;
390@group(0) @binding(2) var<storage, read_write> output_batch: array<TropicalDualClifford>;
391
392@compute @workgroup_size(64)
393fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
394    let idx = global_id.x;
395
396    if (idx >= arrayLength(&input_batch)) {
397        return;
398    }
399
400    let tdc = input_batch[idx];
401    let op_type = u32(operation_params[0]);
402
403    var result: TropicalDualClifford;
404
405    switch (op_type) {
406        case 0u: { // LLM attention computation
407            // Combine tropical path selection with dual gradients and geometric transformations
408            result.tropical.value = max(tdc.tropical.value, operation_params[1]);
409            result.dual.real = tdc.dual.real * operation_params[2];
410            result.dual.dual = tdc.dual.dual * operation_params[2];
411
412            // Geometric rotation in Clifford algebra
413            let angle = operation_params[3];
414            let cos_half = cos(angle * 0.5);
415            let sin_half = sin(angle * 0.5);
416
417            // Simple rotation around e12 plane
418            result.clifford.coeffs[0] = cos_half * tdc.clifford.coeffs[0]; // scalar
419            result.clifford.coeffs[1] = tdc.clifford.coeffs[1]; // e1
420            result.clifford.coeffs[2] = tdc.clifford.coeffs[2]; // e2
421            result.clifford.coeffs[3] = tdc.clifford.coeffs[3]; // e3
422            result.clifford.coeffs[4] = sin_half * tdc.clifford.coeffs[0]; // e12
423            result.clifford.coeffs[5] = tdc.clifford.coeffs[5]; // e13
424            result.clifford.coeffs[6] = tdc.clifford.coeffs[6]; // e23
425            result.clifford.coeffs[7] = tdc.clifford.coeffs[7]; // e123
426        }
427        default: {
428            result = tdc;
429        }
430    }
431
432    output_batch[idx] = result;
433}
434"#;
435
436/// Fusion attention mechanism using tropical algebra
437const FUSION_ATTENTION: &str = r#"
438@group(0) @binding(0) var<storage, read> queries: array<f32>;
439@group(0) @binding(1) var<storage, read> keys: array<f32>;
440@group(0) @binding(2) var<storage, read> values: array<f32>;
441@group(0) @binding(3) var<storage, read_write> attention_output: array<f32>;
442@group(0) @binding(4) var<storage, read> dimensions: array<u32>; // [seq_len, d_model]
443
444@compute @workgroup_size(16, 16)
445fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
446    let seq_pos = global_id.x;
447    let feature_idx = global_id.y;
448
449    let seq_len = dimensions[0];
450    let d_model = dimensions[1];
451
452    if (seq_pos >= seq_len || feature_idx >= d_model) {
453        return;
454    }
455
456    // Tropical attention: use max-plus algebra instead of softmax
457    var max_score = -3.4028235e+38; // -infinity
458    var best_key_idx = 0u;
459
460    // Find the key with maximum tropical attention score
461    for (var key_idx = 0u; key_idx < seq_len; key_idx = key_idx + 1u) {
462        var score = -3.4028235e+38;
463
464        // Compute tropical dot product: sum becomes max, product becomes sum
465        for (var d = 0u; d < d_model; d = d + 1u) {
466            let q = queries[seq_pos * d_model + d];
467            let k = keys[key_idx * d_model + d];
468
469            // Tropical multiplication: q ⊗ k = q + k
470            let tropical_product = q + k;
471
472            // Tropical sum: max operation
473            if (tropical_product > score) {
474                score = tropical_product;
475            }
476        }
477
478        if (score > max_score) {
479            max_score = score;
480            best_key_idx = key_idx;
481        }
482    }
483
484    // Tropical attention: select value from best key (winner-takes-all)
485    attention_output[seq_pos * d_model + feature_idx] =
486        values[best_key_idx * d_model + feature_idx];
487}
488"#;
489
490// =====================================================================
491// HOLOGRAPHIC MEMORY SHADERS
492// =====================================================================
493
494/// Batch binding operation for holographic memory
495/// Computes key ⊛ value for multiple pairs using Clifford geometric product
496pub const HOLOGRAPHIC_BATCH_BIND: &str = r#"
497// TropicalDualClifford representation for GPU
498// We use a simplified 8-dimensional Clifford representation
499struct TDC {
500    // Tropical component (max element)
501    tropical: f32,
502    // Dual component (real and dual parts)
503    dual_real: f32,
504    dual_dual: f32,
505    // Clifford algebra coefficients (8D: scalar, 3 vectors, 3 bivectors, pseudoscalar)
506    clifford: array<f32, 8>,
507    // Padding for alignment
508    _padding: array<f32, 5>,
509}
510
511@group(0) @binding(0) var<storage, read> keys: array<TDC>;
512@group(0) @binding(1) var<storage, read> values: array<TDC>;
513@group(0) @binding(2) var<storage, read_write> results: array<TDC>;
514@group(0) @binding(3) var<uniform> params: array<u32, 4>; // [count, 0, 0, 0]
515
516// Cayley table for 3D Clifford algebra Cl(3,0)
517// Product signs: e_i * e_j where i,j are grade indices
518fn cayley_sign(i: u32, j: u32) -> f32 {
519    // Simplified: for vectors e_i * e_i = 1, e_i * e_j = -e_j * e_i for i != j
520    let signs = array<array<f32, 8>, 8>(
521        // 1    e1   e2   e3   e12  e13  e23  e123
522        array<f32, 8>(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0),   // 1
523        array<f32, 8>(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, -1.0, 1.0),  // e1
524        array<f32, 8>(1.0, -1.0, 1.0, 1.0, 1.0, -1.0, 1.0, 1.0), // e2
525        array<f32, 8>(1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0), // e3
526        array<f32, 8>(1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, 1.0), // e12
527        array<f32, 8>(1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, 1.0), // e13
528        array<f32, 8>(1.0, 1.0, -1.0, 1.0, -1.0, -1.0, -1.0, 1.0), // e23
529        array<f32, 8>(1.0, 1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0), // e123
530    );
531    return signs[i][j];
532}
533
534// Result index for e_i * e_j
535fn cayley_index(i: u32, j: u32) -> u32 {
536    let indices = array<array<u32, 8>, 8>(
537        // 1    e1   e2   e3   e12  e13  e23  e123
538        array<u32, 8>(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u),   // 1
539        array<u32, 8>(1u, 0u, 4u, 5u, 2u, 3u, 7u, 6u),   // e1
540        array<u32, 8>(2u, 4u, 0u, 6u, 1u, 7u, 3u, 5u),   // e2
541        array<u32, 8>(3u, 5u, 6u, 0u, 7u, 1u, 2u, 4u),   // e3
542        array<u32, 8>(4u, 2u, 1u, 7u, 0u, 6u, 5u, 3u),   // e12
543        array<u32, 8>(5u, 3u, 7u, 1u, 6u, 0u, 4u, 2u),   // e13
544        array<u32, 8>(6u, 7u, 3u, 2u, 5u, 4u, 0u, 1u),   // e23
545        array<u32, 8>(7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u),   // e123
546    );
547    return indices[i][j];
548}
549
550@compute @workgroup_size(64)
551fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
552    let idx = global_id.x;
553    let count = params[0];
554
555    if (idx >= count) {
556        return;
557    }
558
559    let key = keys[idx];
560    let value = values[idx];
561
562    var result: TDC;
563
564    // Binding uses geometric product on Clifford components
565    // result = key * value (geometric product)
566    for (var i = 0u; i < 8u; i = i + 1u) {
567        result.clifford[i] = 0.0;
568    }
569
570    for (var i = 0u; i < 8u; i = i + 1u) {
571        for (var j = 0u; j < 8u; j = j + 1u) {
572            let target = cayley_index(i, j);
573            let sign = cayley_sign(i, j);
574            result.clifford[target] += sign * key.clifford[i] * value.clifford[j];
575        }
576    }
577
578    // Tropical: max of both (binding produces new tropical value)
579    result.tropical = max(key.tropical, value.tropical);
580
581    // Dual: product rule for dual numbers
582    result.dual_real = key.dual_real * value.dual_real;
583    result.dual_dual = key.dual_real * value.dual_dual + key.dual_dual * value.dual_real;
584
585    results[idx] = result;
586}
587"#;
588
589/// Batch similarity computation for holographic vectors
590/// Computes pairwise similarities using inner product with reverse: <A B̃>₀
591pub const HOLOGRAPHIC_BATCH_SIMILARITY: &str = r#"
592struct TDC {
593    tropical: f32,
594    dual_real: f32,
595    dual_dual: f32,
596    clifford: array<f32, 8>,
597    _padding: array<f32, 5>,
598}
599
600@group(0) @binding(0) var<storage, read> vectors_a: array<TDC>;
601@group(0) @binding(1) var<storage, read> vectors_b: array<TDC>;
602@group(0) @binding(2) var<storage, read_write> similarities: array<f32>;
603@group(0) @binding(3) var<uniform> params: array<u32, 4>; // [count_a, count_b, mode, 0]
604                                                          // mode: 0=pairwise (a[i] vs b[i]), 1=matrix (all pairs)
605
606// Compute reverse of multivector (flip sign of grades 2 and 3)
607fn reverse_sign(grade: u32) -> f32 {
608    // Grade 0: +1, Grade 1: +1, Grade 2: -1, Grade 3: -1
609    // For Cl(3,0): indices 0=scalar(g0), 1-3=vectors(g1), 4-6=bivectors(g2), 7=trivector(g3)
610    let signs = array<f32, 8>(1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0);
611    return signs[grade];
612}
613
614// Compute scalar product <A B̃>₀ - the proper inner product for similarity
615fn scalar_product_with_reverse(a: TDC, b: TDC) -> f32 {
616    var result = 0.0;
617
618    // For each basis element, compute contribution to scalar part
619    // Using simplified formula: sum of a[i] * b[i] * reverse_sign(i) * cayley_contribution_to_scalar
620    // For diagonal elements (same basis): e_i * e_i contributes to scalar
621    for (var i = 0u; i < 8u; i = i + 1u) {
622        result += a.clifford[i] * b.clifford[i] * reverse_sign(i);
623    }
624
625    return result;
626}
627
628fn norm(v: TDC) -> f32 {
629    var sum = 0.0;
630    for (var i = 0u; i < 8u; i = i + 1u) {
631        sum += v.clifford[i] * v.clifford[i];
632    }
633    return sqrt(sum);
634}
635
636@compute @workgroup_size(256)
637fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
638    let idx = global_id.x;
639    let count_a = params[0];
640    let count_b = params[1];
641    let mode = params[2];
642
643    if (mode == 0u) {
644        // Pairwise mode: similarities[i] = sim(a[i], b[i])
645        if (idx >= count_a) {
646            return;
647        }
648
649        let a = vectors_a[idx];
650        let b = vectors_b[idx];
651
652        let norm_a = norm(a);
653        let norm_b = norm(b);
654
655        if (norm_a < 1e-10 || norm_b < 1e-10) {
656            similarities[idx] = 0.0;
657            return;
658        }
659
660        let inner = scalar_product_with_reverse(a, b);
661        similarities[idx] = inner / (norm_a * norm_b);
662    } else {
663        // Matrix mode: similarities[i * count_b + j] = sim(a[i], b[j])
664        let total = count_a * count_b;
665        if (idx >= total) {
666            return;
667        }
668
669        let i = idx / count_b;
670        let j = idx % count_b;
671
672        let a = vectors_a[i];
673        let b = vectors_b[j];
674
675        let norm_a = norm(a);
676        let norm_b = norm(b);
677
678        if (norm_a < 1e-10 || norm_b < 1e-10) {
679            similarities[idx] = 0.0;
680            return;
681        }
682
683        let inner = scalar_product_with_reverse(a, b);
684        similarities[idx] = inner / (norm_a * norm_b);
685    }
686}
687"#;
688
689/// Bundle all vectors into a superposition (weighted average)
690pub const HOLOGRAPHIC_BUNDLE_ALL: &str = r#"
691struct TDC {
692    tropical: f32,
693    dual_real: f32,
694    dual_dual: f32,
695    clifford: array<f32, 8>,
696    _padding: array<f32, 5>,
697}
698
699@group(0) @binding(0) var<storage, read> vectors: array<TDC>;
700@group(0) @binding(1) var<storage, read_write> result: array<TDC>; // Single output
701@group(0) @binding(2) var<uniform> params: vec4<f32>; // [count, beta, normalize, 0]
702
703// Workgroup shared memory for parallel reduction
704var<workgroup> shared_clifford: array<array<f32, 8>, 64>;
705var<workgroup> shared_tropical: array<f32, 64>;
706var<workgroup> shared_dual_real: array<f32, 64>;
707var<workgroup> shared_dual_dual: array<f32, 64>;
708
709@compute @workgroup_size(64)
710fn main(
711    @builtin(global_invocation_id) global_id: vec3<u32>,
712    @builtin(local_invocation_id) local_id: vec3<u32>,
713    @builtin(workgroup_id) workgroup_id: vec3<u32>
714) {
715    let idx = global_id.x;
716    let local_idx = local_id.x;
717    let count = u32(params.x);
718    let beta = params.y;
719    let do_normalize = params.z > 0.5;
720
721    // Initialize shared memory
722    for (var i = 0u; i < 8u; i = i + 1u) {
723        shared_clifford[local_idx][i] = 0.0;
724    }
725    shared_tropical[local_idx] = -3.4028235e+38; // -inf for tropical
726    shared_dual_real[local_idx] = 0.0;
727    shared_dual_dual[local_idx] = 0.0;
728
729    // Load data into shared memory
730    if (idx < count) {
731        let v = vectors[idx];
732        for (var i = 0u; i < 8u; i = i + 1u) {
733            shared_clifford[local_idx][i] = v.clifford[i];
734        }
735        shared_tropical[local_idx] = v.tropical;
736        shared_dual_real[local_idx] = v.dual_real;
737        shared_dual_dual[local_idx] = v.dual_dual;
738    }
739
740    workgroupBarrier();
741
742    // Parallel reduction
743    for (var stride = 32u; stride > 0u; stride = stride / 2u) {
744        if (local_idx < stride && local_idx + stride < 64u) {
745            // Bundle Clifford components (sum/average)
746            for (var i = 0u; i < 8u; i = i + 1u) {
747                shared_clifford[local_idx][i] += shared_clifford[local_idx + stride][i];
748            }
749            // Tropical: take max
750            shared_tropical[local_idx] = max(shared_tropical[local_idx], shared_tropical[local_idx + stride]);
751            // Dual: sum
752            shared_dual_real[local_idx] += shared_dual_real[local_idx + stride];
753            shared_dual_dual[local_idx] += shared_dual_dual[local_idx + stride];
754        }
755        workgroupBarrier();
756    }
757
758    // Thread 0 writes result
759    if (local_idx == 0u) {
760        var final_result: TDC;
761
762        // Average the Clifford components
763        let scale = 1.0 / f32(count);
764        for (var i = 0u; i < 8u; i = i + 1u) {
765            final_result.clifford[i] = shared_clifford[0][i] * scale;
766        }
767
768        final_result.tropical = shared_tropical[0];
769        final_result.dual_real = shared_dual_real[0] * scale;
770        final_result.dual_dual = shared_dual_dual[0] * scale;
771
772        // Optionally normalize
773        if (do_normalize) {
774            var norm_sq = 0.0;
775            for (var i = 0u; i < 8u; i = i + 1u) {
776                norm_sq += final_result.clifford[i] * final_result.clifford[i];
777            }
778            let norm = sqrt(norm_sq);
779            if (norm > 1e-10) {
780                let inv_norm = 1.0 / norm;
781                for (var i = 0u; i < 8u; i = i + 1u) {
782                    final_result.clifford[i] *= inv_norm;
783                }
784            }
785        }
786
787        result[workgroup_id.x] = final_result;
788    }
789}
790"#;
791
792/// Resonator cleanup step - computes similarities against codebook
793pub const HOLOGRAPHIC_RESONATOR_STEP: &str = r#"
794struct TDC {
795    tropical: f32,
796    dual_real: f32,
797    dual_dual: f32,
798    clifford: array<f32, 8>,
799    _padding: array<f32, 5>,
800}
801
802struct ResonatorOutput {
803    cleaned: TDC,
804    best_index: u32,
805    best_similarity: f32,
806    _padding: array<f32, 2>,
807}
808
809@group(0) @binding(0) var<storage, read> input: TDC;
810@group(0) @binding(1) var<storage, read> codebook: array<TDC>;
811@group(0) @binding(2) var<storage, read_write> output: ResonatorOutput;
812@group(0) @binding(3) var<uniform> params: array<u32, 4>; // [codebook_size, max_iterations, 0, 0]
813
814fn reverse_sign(grade: u32) -> f32 {
815    let signs = array<f32, 8>(1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0);
816    return signs[grade];
817}
818
819fn scalar_product_with_reverse(a: TDC, b: TDC) -> f32 {
820    var result = 0.0;
821    for (var i = 0u; i < 8u; i = i + 1u) {
822        result += a.clifford[i] * b.clifford[i] * reverse_sign(i);
823    }
824    return result;
825}
826
827fn norm(v: TDC) -> f32 {
828    var sum = 0.0;
829    for (var i = 0u; i < 8u; i = i + 1u) {
830        sum += v.clifford[i] * v.clifford[i];
831    }
832    return sqrt(sum);
833}
834
835fn similarity(a: TDC, b: TDC) -> f32 {
836    let norm_a = norm(a);
837    let norm_b = norm(b);
838    if (norm_a < 1e-10 || norm_b < 1e-10) {
839        return 0.0;
840    }
841    return scalar_product_with_reverse(a, b) / (norm_a * norm_b);
842}
843
844// Workgroup shared memory for parallel max finding
845var<workgroup> shared_best_sim: array<f32, 256>;
846var<workgroup> shared_best_idx: array<u32, 256>;
847
848@compute @workgroup_size(256)
849fn main(
850    @builtin(global_invocation_id) global_id: vec3<u32>,
851    @builtin(local_invocation_id) local_id: vec3<u32>
852) {
853    let idx = global_id.x;
854    let local_idx = local_id.x;
855    let codebook_size = params[0];
856
857    // Initialize
858    shared_best_sim[local_idx] = -2.0; // Below minimum similarity
859    shared_best_idx[local_idx] = 0u;
860
861    // Each thread computes similarity for one codebook entry
862    if (idx < codebook_size) {
863        let sim = similarity(input, codebook[idx]);
864        shared_best_sim[local_idx] = sim;
865        shared_best_idx[local_idx] = idx;
866    }
867
868    workgroupBarrier();
869
870    // Parallel reduction to find max
871    for (var stride = 128u; stride > 0u; stride = stride / 2u) {
872        if (local_idx < stride && local_idx + stride < 256u) {
873            if (shared_best_sim[local_idx + stride] > shared_best_sim[local_idx]) {
874                shared_best_sim[local_idx] = shared_best_sim[local_idx + stride];
875                shared_best_idx[local_idx] = shared_best_idx[local_idx + stride];
876            }
877        }
878        workgroupBarrier();
879    }
880
881    // Thread 0 writes result
882    if (local_idx == 0u) {
883        let best_idx = shared_best_idx[0];
884        output.cleaned = codebook[best_idx];
885        output.best_index = best_idx;
886        output.best_similarity = shared_best_sim[0];
887    }
888}
889"#;
890
891// =====================================================================
892// INFORMATION GEOMETRY SHADERS
893// =====================================================================
894
895/// Fisher information matrix computation
896const FISHER_INFORMATION: &str = r#"
897@group(0) @binding(0) var<storage, read> probability_params: array<f32>;
898@group(0) @binding(1) var<storage, read> data_points: array<f32>;
899@group(0) @binding(2) var<storage, read_write> fisher_matrix: array<f32>;
900@group(0) @binding(3) var<storage, read> dimensions: array<u32>; // [n_params, n_data]
901
902@compute @workgroup_size(16, 16)
903fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
904    let param_i = global_id.x;
905    let param_j = global_id.y;
906
907    let n_params = dimensions[0];
908    let n_data = dimensions[1];
909
910    if (param_i >= n_params || param_j >= n_params) {
911        return;
912    }
913
914    // Fisher Information Matrix: I[i,j] = E[∂²log p(x|θ)/∂θᵢ∂θⱼ]
915    var fisher_element = 0.0;
916
917    for (var data_idx = 0u; data_idx < n_data; data_idx = data_idx + 1u) {
918        let x = data_points[data_idx];
919
920        // Gaussian log-likelihood example: log p(x|μ,σ) = -½log(2πσ²) - (x-μ)²/(2σ²)
921        let mu = probability_params[0];
922        let sigma = probability_params[1];
923        let sigma_sq = sigma * sigma;
924
925        var d2_log_p = 0.0;
926
927        if (param_i == 0u && param_j == 0u) { // ∂²/∂μ²
928            d2_log_p = -1.0 / sigma_sq;
929        } else if (param_i == 1u && param_j == 1u) { // ∂²/∂σ²
930            let diff = x - mu;
931            d2_log_p = -1.0 / sigma_sq + 3.0 * diff * diff / (sigma_sq * sigma_sq);
932        } else if ((param_i == 0u && param_j == 1u) || (param_i == 1u && param_j == 0u)) { // ∂²/∂μ∂σ
933            let diff = x - mu;
934            d2_log_p = 2.0 * diff / (sigma_sq * sigma);
935        }
936
937        fisher_element += -d2_log_p; // Fisher = -E[Hessian of log-likelihood]
938    }
939
940    fisher_matrix[param_i * n_params + param_j] = fisher_element / f32(n_data);
941}
942"#;
943
944/// Batch KL divergence computation
945const KL_DIVERGENCE_BATCH: &str = r#"
946@group(0) @binding(0) var<storage, read> distribution_p: array<f32>;
947@group(0) @binding(1) var<storage, read> distribution_q: array<f32>;
948@group(0) @binding(2) var<storage, read_write> kl_divergences: array<f32>;
949@group(0) @binding(3) var<storage, read> batch_info: array<u32>; // [batch_size, dist_size]
950
951@compute @workgroup_size(256)
952fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
953    let batch_idx = global_id.x;
954    let batch_size = batch_info[0];
955    let dist_size = batch_info[1];
956
957    if (batch_idx >= batch_size) {
958        return;
959    }
960
961    // KL divergence: D_KL(P||Q) = Σ P(x) log(P(x)/Q(x))
962    var kl_div = 0.0;
963
964    for (var i = 0u; i < dist_size; i = i + 1u) {
965        let p_i = distribution_p[batch_idx * dist_size + i];
966        let q_i = distribution_q[batch_idx * dist_size + i];
967
968        if (p_i > 1e-10 && q_i > 1e-10) { // Avoid log(0)
969            kl_div += p_i * log(p_i / q_i);
970        }
971    }
972
973    kl_divergences[batch_idx] = kl_div;
974}
975"#;
976
977// =====================================================================
978// CELLULAR AUTOMATA SHADERS
979// =====================================================================
980
981/// Cellular automata evolution step
982pub const CA_EVOLUTION: &str = r#"
983@group(0) @binding(0) var<storage, read> current_state: array<u32>;
984@group(0) @binding(1) var<storage, read_write> next_state: array<u32>;
985@group(0) @binding(2) var<storage, read> rules: array<u32>;
986@group(0) @binding(3) var<storage, read> dimensions: array<u32>; // [width, height]
987
988@compute @workgroup_size(16, 16)
989fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
990    let x = global_id.x;
991    let y = global_id.y;
992
993    let width = dimensions[0];
994    let height = dimensions[1];
995
996    if (x >= width || y >= height) {
997        return;
998    }
999
1000    let idx = y * width + x;
1001    let current_cell = current_state[idx];
1002
1003    // Count alive neighbors (Moore neighborhood)
1004    var alive_neighbors = 0u;
1005
1006    for (var dy = 0u; dy < 3u; dy = dy + 1u) {
1007        for (var dx = 0u; dx < 3u; dx = dx + 1u) {
1008            if (dx == 1u && dy == 1u) { continue; } // Skip center cell
1009
1010            let nx = (x + dx + width - 1u) % width; // Wrap around
1011            let ny = (y + dy + height - 1u) % height;
1012            let neighbor_idx = ny * width + nx;
1013
1014            if (current_state[neighbor_idx] == 1u) {
1015                alive_neighbors = alive_neighbors + 1u;
1016            }
1017        }
1018    }
1019
1020    // Conway's Game of Life rules (can be customized via rules buffer)
1021    var new_state = 0u;
1022
1023    if (current_cell == 1u) { // Currently alive
1024        if (alive_neighbors == 2u || alive_neighbors == 3u) {
1025            new_state = 1u; // Survive
1026        }
1027    } else { // Currently dead
1028        if (alive_neighbors == 3u) {
1029            new_state = 1u; // Birth
1030        }
1031    }
1032
1033    next_state[idx] = new_state;
1034}
1035"#;
1036
1037/// Rule application for geometric algebra cellular automata
1038pub const RULE_APPLICATION: &str = r#"
1039struct GpuCellData {
1040    scalar: f32,
1041    e1: f32,
1042    e2: f32,
1043    e3: f32,
1044    e12: f32,
1045    e13: f32,
1046    e23: f32,
1047    e123: f32,
1048    generation: f32,
1049    neighborhood_size: f32,
1050    rule_type: f32,
1051    boundary_condition: f32,
1052    padding: array<f32, 4>,
1053}
1054
1055struct GpuRuleConfig {
1056    rule_type: f32,
1057    threshold: f32,
1058    damping_factor: f32,
1059    energy_conservation: f32,
1060    time_step: f32,
1061    spatial_scale: f32,
1062    geometric_weight: f32,
1063    nonlinear_factor: f32,
1064    boundary_type: f32,
1065    neighborhood_radius: f32,
1066    evolution_speed: f32,
1067    stability_factor: f32,
1068    padding: array<f32, 4>,
1069}
1070
1071@group(0) @binding(0) var<storage, read> cells: array<GpuCellData>;
1072@group(0) @binding(1) var<storage, read> rules: array<GpuRuleConfig>;
1073@group(0) @binding(2) var<storage, read_write> output: array<GpuCellData>;
1074
1075@compute @workgroup_size(256)
1076fn rule_application_main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1077    let idx = global_id.x;
1078
1079    if (idx >= arrayLength(&cells)) {
1080        return;
1081    }
1082
1083    let cell = cells[idx];
1084    let rule = rules[0]; // Use first rule for now
1085
1086    var new_cell = cell;
1087
1088    // Apply damping factor
1089    new_cell.scalar = cell.scalar * (1.0 - rule.damping_factor);
1090    new_cell.e1 = cell.e1 * (1.0 - rule.damping_factor);
1091    new_cell.e2 = cell.e2 * (1.0 - rule.damping_factor);
1092    new_cell.e3 = cell.e3 * (1.0 - rule.damping_factor);
1093    new_cell.e12 = cell.e12 * (1.0 - rule.damping_factor);
1094    new_cell.e13 = cell.e13 * (1.0 - rule.damping_factor);
1095    new_cell.e23 = cell.e23 * (1.0 - rule.damping_factor);
1096    new_cell.e123 = cell.e123 * (1.0 - rule.damping_factor);
1097
1098    // Apply threshold
1099    if (abs(new_cell.scalar) < rule.threshold) {
1100        new_cell.scalar = 0.0;
1101    }
1102
1103    output[idx] = new_cell;
1104}
1105"#;
1106
1107/// Energy calculation for cellular automata
1108pub const ENERGY_CALCULATION: &str = r#"
1109struct GpuCellData {
1110    scalar: f32,
1111    e1: f32,
1112    e2: f32,
1113    e3: f32,
1114    e12: f32,
1115    e13: f32,
1116    e23: f32,
1117    e123: f32,
1118    generation: f32,
1119    neighborhood_size: f32,
1120    rule_type: f32,
1121    boundary_condition: f32,
1122    padding: array<f32, 4>,
1123}
1124
1125@group(0) @binding(0) var<storage, read> cells: array<GpuCellData>;
1126@group(0) @binding(1) var<storage, read_write> total_energy: array<f32>;
1127
1128@compute @workgroup_size(1)
1129fn energy_calculation_main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1130    var energy = 0.0;
1131
1132    // Sum the squared magnitudes of all multivector components
1133    for (var i = 0u; i < arrayLength(&cells); i = i + 1u) {
1134        let cell = cells[i];
1135        energy += cell.scalar * cell.scalar;
1136        energy += cell.e1 * cell.e1;
1137        energy += cell.e2 * cell.e2;
1138        energy += cell.e3 * cell.e3;
1139        energy += cell.e12 * cell.e12;
1140        energy += cell.e13 * cell.e13;
1141        energy += cell.e23 * cell.e23;
1142        energy += cell.e123 * cell.e123;
1143    }
1144
1145    total_energy[0] = energy;
1146}
1147"#;
1148
1149/// Neighbor extraction for cellular automata
1150pub const NEIGHBOR_EXTRACTION: &str = r#"
1151struct GpuCellData {
1152    scalar: f32,
1153    e1: f32,
1154    e2: f32,
1155    e3: f32,
1156    e12: f32,
1157    e13: f32,
1158    e23: f32,
1159    e123: f32,
1160    generation: f32,
1161    neighborhood_size: f32,
1162    rule_type: f32,
1163    boundary_condition: f32,
1164    padding: array<f32, 4>,
1165}
1166
1167@group(0) @binding(0) var<storage, read> cells: array<GpuCellData>;
1168@group(0) @binding(1) var<uniform> params: array<f32, 4>; // [width, height, total_cells, padding]
1169@group(0) @binding(2) var<storage, read_write> neighborhoods: array<GpuCellData>;
1170
1171@compute @workgroup_size(256)
1172fn neighbor_extraction_main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1173    let idx = global_id.x;
1174    let width = u32(params[0]);
1175    let height = u32(params[1]);
1176    let total_cells = u32(params[2]);
1177
1178    if (idx >= total_cells) {
1179        return;
1180    }
1181
1182    // Calculate 2D position from linear index
1183    let x = idx % width;
1184    let y = idx / width;
1185
1186    // Moore neighborhood: 8 neighbors
1187    let offsets = array<vec2<i32>, 8>(
1188        vec2<i32>(-1, -1), vec2<i32>(0, -1), vec2<i32>(1, -1),
1189        vec2<i32>(-1,  0),                   vec2<i32>(1,  0),
1190        vec2<i32>(-1,  1), vec2<i32>(0,  1), vec2<i32>(1,  1)
1191    );
1192
1193    // Extract neighbors with wrapping boundaries
1194    for (var i = 0u; i < 8u; i = i + 1u) {
1195        let offset = offsets[i];
1196        let nx = (i32(x) + offset.x + i32(width)) % i32(width);
1197        let ny = (i32(y) + offset.y + i32(height)) % i32(height);
1198        let neighbor_idx = u32(ny) * width + u32(nx);
1199
1200        // Store neighbor in output array
1201        neighborhoods[idx * 8u + i] = cells[neighbor_idx];
1202    }
1203}
1204"#;
1205
1206/// Self-assembly pattern formation
1207const CA_SELF_ASSEMBLY: &str = r#"
1208@group(0) @binding(0) var<storage, read> particles: array<f32>; // [x, y, type, energy]
1209@group(0) @binding(1) var<storage, read_write> new_particles: array<f32>;
1210@group(0) @binding(2) var<storage, read> assembly_rules: array<f32>;
1211@group(0) @binding(3) var<storage, read> simulation_params: array<u32>; // [n_particles, grid_size]
1212
1213@compute @workgroup_size(64)
1214fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1215    let particle_idx = global_id.x;
1216    let n_particles = simulation_params[0];
1217    let grid_size = simulation_params[1];
1218
1219    if (particle_idx >= n_particles) {
1220        return;
1221    }
1222
1223    let base_idx = particle_idx * 4u;
1224    let x = particles[base_idx];
1225    let y = particles[base_idx + 1u];
1226    let particle_type = particles[base_idx + 2u];
1227    let energy = particles[base_idx + 3u];
1228
1229    // Self-assembly based on local interactions
1230    var new_x = x;
1231    var new_y = y;
1232    var new_energy = energy;
1233
1234    // Calculate forces from nearby particles
1235    var force_x = 0.0;
1236    var force_y = 0.0;
1237
1238    for (var other_idx = 0u; other_idx < n_particles; other_idx = other_idx + 1u) {
1239        if (other_idx == particle_idx) { continue; }
1240
1241        let other_base = other_idx * 4u;
1242        let other_x = particles[other_base];
1243        let other_y = particles[other_base + 1u];
1244        let other_type = particles[other_base + 2u];
1245
1246        let dx = other_x - x;
1247        let dy = other_y - y;
1248        let distance = sqrt(dx * dx + dy * dy);
1249
1250        if (distance < 5.0 && distance > 0.1) { // Interaction range
1251            let interaction_strength = assembly_rules[u32(particle_type) * 4u + u32(other_type)];
1252
1253            // Attractive/repulsive force based on particle types
1254            let force_magnitude = interaction_strength / (distance * distance);
1255            force_x += force_magnitude * dx / distance;
1256            force_y += force_magnitude * dy / distance;
1257        }
1258    }
1259
1260    // Update position based on forces
1261    new_x += force_x * 0.1; // time step
1262    new_y += force_y * 0.1;
1263
1264    // Keep within bounds
1265    new_x = clamp(new_x, 0.0, f32(grid_size));
1266    new_y = clamp(new_y, 0.0, f32(grid_size));
1267
1268    // Energy dissipation
1269    new_energy = energy * 0.99;
1270
1271    new_particles[base_idx] = new_x;
1272    new_particles[base_idx + 1u] = new_y;
1273    new_particles[base_idx + 2u] = particle_type;
1274    new_particles[base_idx + 3u] = new_energy;
1275}
1276"#;
1277
1278// =====================================================================
1279// ENUMERATIVE GEOMETRY SHADERS
1280// =====================================================================
1281
1282/// Intersection theory computations
1283pub const INTERSECTION_THEORY: &str = r#"
1284struct RationalNumber {
1285    numerator: i32,
1286    denominator: i32,
1287}
1288
1289@group(0) @binding(0) var<storage, read> chow_class_a: array<RationalNumber>;
1290@group(0) @binding(1) var<storage, read> chow_class_b: array<RationalNumber>;
1291@group(0) @binding(2) var<storage, read_write> intersection_result: array<RationalNumber>;
1292@group(0) @binding(3) var<storage, read> geometry_params: array<u32>; // [dimension, degree_a, degree_b]
1293
1294fn gcd(a: u32, b: u32) -> u32 {
1295    if (b == 0u) { return a; }
1296    return gcd(b, a % b);
1297}
1298
1299fn add_rationals(a: RationalNumber, b: RationalNumber) -> RationalNumber {
1300    let num = a.numerator * b.denominator + b.numerator * a.denominator;
1301    let den = a.denominator * b.denominator;
1302    let g = gcd(u32(abs(num)), u32(abs(den)));
1303
1304    return RationalNumber(num / i32(g), den / i32(g));
1305}
1306
1307fn multiply_rationals(a: RationalNumber, b: RationalNumber) -> RationalNumber {
1308    let num = a.numerator * b.numerator;
1309    let den = a.denominator * b.denominator;
1310    let g = gcd(u32(abs(num)), u32(abs(den)));
1311
1312    return RationalNumber(num / i32(g), den / i32(g));
1313}
1314
1315@compute @workgroup_size(256)
1316fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1317    let idx = global_id.x;
1318
1319    if (idx >= arrayLength(&chow_class_a)) {
1320        return;
1321    }
1322
1323    let dimension = geometry_params[0];
1324    let degree_a = geometry_params[1];
1325    let degree_b = geometry_params[2];
1326
1327    // Intersection product in Chow ring: A · B
1328    // For simplicity, implement as pointwise multiplication for this example
1329    let a = chow_class_a[idx];
1330    let b = chow_class_b[idx];
1331
1332    // Check degree compatibility (degree_a + degree_b ≤ dimension)
1333    if (degree_a + degree_b <= dimension) {
1334        intersection_result[idx] = multiply_rationals(a, b);
1335    } else {
1336        intersection_result[idx] = RationalNumber(0, 1); // Zero class
1337    }
1338}
1339"#;
1340
1341/// Schubert calculus computations
1342const SCHUBERT_CALCULUS: &str = r#"
1343@group(0) @binding(0) var<storage, read> partition_a: array<u32>;
1344@group(0) @binding(1) var<storage, read> partition_b: array<u32>;
1345@group(0) @binding(2) var<storage, read_write> littlewood_coeff: array<u32>;
1346@group(0) @binding(3) var<storage, read> grassmann_params: array<u32>; // [n, k]
1347
1348@compute @workgroup_size(128)
1349fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1350    let coeff_idx = global_id.x;
1351
1352    let n = grassmann_params[0]; // Dimension of ambient space
1353    let k = grassmann_params[1]; // Dimension of subspaces
1354
1355    if (coeff_idx >= arrayLength(&littlewood_coeff)) {
1356        return;
1357    }
1358
1359    // Schubert calculus: compute Littlewood-Richardson coefficients
1360    // This is a simplified version - full LR coefficients require more complex algorithms
1361
1362    let max_parts = min(arrayLength(&partition_a), arrayLength(&partition_b));
1363    var coefficient = 0u;
1364
1365    // Simplified intersection number computation
1366    for (var i = 0u; i < max_parts; i = i + 1u) {
1367        let part_a = partition_a[i];
1368        let part_b = partition_b[i];
1369
1370        // Check compatibility with Grassmannian Gr(k, n)
1371        if (part_a <= n - k && part_b <= n - k) {
1372            coefficient += part_a * part_b;
1373        }
1374    }
1375
1376    littlewood_coeff[coeff_idx] = coefficient;
1377}
1378"#;
1379
1380#[cfg(test)]
1381mod tests {
1382    use super::*;
1383
1384    #[test]
1385    fn test_shader_library_creation() {
1386        let library = ShaderLibrary::new();
1387        let shaders = library.list_shaders();
1388
1389        // Should have shaders for all mathematical domains
1390        assert!(shaders.contains(&"tropical_matrix_multiply".to_string()));
1391        assert!(shaders.contains(&"dual_forward_ad".to_string()));
1392        assert!(shaders.contains(&"tropical_dual_clifford".to_string()));
1393        assert!(shaders.contains(&"fisher_information".to_string()));
1394        assert!(shaders.contains(&"ca_evolution".to_string()));
1395        assert!(shaders.contains(&"intersection_theory".to_string()));
1396
1397        // Holographic memory shaders
1398        assert!(shaders.contains(&"holographic_batch_bind".to_string()));
1399        assert!(shaders.contains(&"holographic_batch_similarity".to_string()));
1400        assert!(shaders.contains(&"holographic_bundle_all".to_string()));
1401        assert!(shaders.contains(&"holographic_resonator_step".to_string()));
1402    }
1403
1404    #[test]
1405    fn test_shader_retrieval() {
1406        let library = ShaderLibrary::new();
1407
1408        let shader = library.get_shader("tropical_matrix_multiply");
1409        assert!(shader.is_some());
1410        assert!(shader.unwrap().contains("@compute"));
1411        assert!(shader.unwrap().contains("tropical"));
1412    }
1413
1414    #[test]
1415    fn test_shader_constants() {
1416        assert_eq!(TROPICAL_SHADERS.len(), 3);
1417        assert_eq!(DUAL_SHADERS.len(), 3);
1418        assert_eq!(FUSION_SHADERS.len(), 2);
1419        assert_eq!(HOLOGRAPHIC_SHADERS.len(), 4);
1420    }
1421
1422    #[test]
1423    fn test_holographic_shaders() {
1424        // Verify holographic shaders contain expected WGSL patterns
1425        assert!(HOLOGRAPHIC_BATCH_BIND.contains("@compute"));
1426        assert!(HOLOGRAPHIC_BATCH_BIND.contains("cayley"));
1427
1428        assert!(HOLOGRAPHIC_BATCH_SIMILARITY.contains("@compute"));
1429        assert!(HOLOGRAPHIC_BATCH_SIMILARITY.contains("similarity"));
1430
1431        assert!(HOLOGRAPHIC_BUNDLE_ALL.contains("@compute"));
1432        assert!(HOLOGRAPHIC_BUNDLE_ALL.contains("workgroupBarrier"));
1433
1434        assert!(HOLOGRAPHIC_RESONATOR_STEP.contains("@compute"));
1435        assert!(HOLOGRAPHIC_RESONATOR_STEP.contains("codebook"));
1436    }
1437}
amari_gpu/shaders.rs

amari_gpu/
shaders.rs