amari_gpu/
shaders.rs

1//! WebGPU compute shader library for mathematical operations
2//!
3//! This module contains optimized WGSL compute shaders for various mathematical
4//! domains including tropical algebra, automatic differentiation, and fusion systems.
5
6use std::collections::HashMap;
7
8/// Shader library managing all mathematical compute shaders
9pub struct ShaderLibrary {
10    shaders: HashMap<String, &'static str>,
11}
12
13impl ShaderLibrary {
14    /// Create new shader library with all mathematical shaders
15    pub fn new() -> Self {
16        let mut shaders = HashMap::new();
17
18        // Tropical algebra shaders
19        shaders.insert(
20            "tropical_matrix_multiply".to_string(),
21            TROPICAL_MATRIX_MULTIPLY,
22        );
23        shaders.insert("tropical_vector_add".to_string(), TROPICAL_VECTOR_ADD);
24        shaders.insert(
25            "tropical_neural_network".to_string(),
26            TROPICAL_NEURAL_NETWORK,
27        );
28
29        // Dual number shaders
30        shaders.insert("dual_forward_ad".to_string(), DUAL_FORWARD_AD);
31        shaders.insert("dual_batch_gradient".to_string(), DUAL_BATCH_GRADIENT);
32        shaders.insert("dual_chain_rule".to_string(), DUAL_CHAIN_RULE);
33
34        // Fusion system shaders
35        shaders.insert("tropical_dual_clifford".to_string(), TROPICAL_DUAL_CLIFFORD);
36        shaders.insert("fusion_attention".to_string(), FUSION_ATTENTION);
37
38        // Information geometry shaders
39        shaders.insert("fisher_information".to_string(), FISHER_INFORMATION);
40        shaders.insert("kl_divergence_batch".to_string(), KL_DIVERGENCE_BATCH);
41
42        // Cellular automata shaders
43        shaders.insert("ca_evolution".to_string(), CA_EVOLUTION);
44        shaders.insert("ca_self_assembly".to_string(), CA_SELF_ASSEMBLY);
45        shaders.insert("rule_application".to_string(), RULE_APPLICATION);
46        shaders.insert("energy_calculation".to_string(), ENERGY_CALCULATION);
47        shaders.insert("neighbor_extraction".to_string(), NEIGHBOR_EXTRACTION);
48
49        // Enumerative geometry shaders
50        shaders.insert("intersection_theory".to_string(), INTERSECTION_THEORY);
51        shaders.insert("schubert_calculus".to_string(), SCHUBERT_CALCULUS);
52
53        // Holographic memory shaders
54        shaders.insert("holographic_batch_bind".to_string(), HOLOGRAPHIC_BATCH_BIND);
55        shaders.insert(
56            "holographic_batch_similarity".to_string(),
57            HOLOGRAPHIC_BATCH_SIMILARITY,
58        );
59        shaders.insert("holographic_bundle_all".to_string(), HOLOGRAPHIC_BUNDLE_ALL);
60        shaders.insert(
61            "holographic_resonator_step".to_string(),
62            HOLOGRAPHIC_RESONATOR_STEP,
63        );
64
65        // Topology shaders
66        shaders.insert(
67            "topology_distance_matrix".to_string(),
68            TOPOLOGY_DISTANCE_MATRIX,
69        );
70        shaders.insert(
71            "topology_morse_critical".to_string(),
72            TOPOLOGY_MORSE_CRITICAL,
73        );
74        shaders.insert(
75            "topology_boundary_matrix".to_string(),
76            TOPOLOGY_BOUNDARY_MATRIX,
77        );
78        shaders.insert(
79            "topology_matrix_reduction".to_string(),
80            TOPOLOGY_MATRIX_REDUCTION,
81        );
82
83        Self { shaders }
84    }
85
86    /// Get shader source by name
87    pub fn get_shader(&self, name: &str) -> Option<&'static str> {
88        self.shaders.get(name).copied()
89    }
90
91    /// List all available shaders
92    pub fn list_shaders(&self) -> Vec<String> {
93        self.shaders.keys().cloned().collect()
94    }
95}
96
97impl Default for ShaderLibrary {
98    fn default() -> Self {
99        Self::new()
100    }
101}
102
103/// Tropical algebra shader collection
104pub const TROPICAL_SHADERS: &[(&str, &str)] = &[
105    ("tropical_matrix_multiply", TROPICAL_MATRIX_MULTIPLY),
106    ("tropical_vector_add", TROPICAL_VECTOR_ADD),
107    ("tropical_neural_network", TROPICAL_NEURAL_NETWORK),
108];
109
110/// Dual number shader collection
111pub const DUAL_SHADERS: &[(&str, &str)] = &[
112    ("dual_forward_ad", DUAL_FORWARD_AD),
113    ("dual_batch_gradient", DUAL_BATCH_GRADIENT),
114    ("dual_chain_rule", DUAL_CHAIN_RULE),
115];
116
117/// Fusion system shader collection
118pub const FUSION_SHADERS: &[(&str, &str)] = &[
119    ("tropical_dual_clifford", TROPICAL_DUAL_CLIFFORD),
120    ("fusion_attention", FUSION_ATTENTION),
121];
122
123/// Holographic memory shader collection
124pub const HOLOGRAPHIC_SHADERS: &[(&str, &str)] = &[
125    ("holographic_batch_bind", HOLOGRAPHIC_BATCH_BIND),
126    ("holographic_batch_similarity", HOLOGRAPHIC_BATCH_SIMILARITY),
127    ("holographic_bundle_all", HOLOGRAPHIC_BUNDLE_ALL),
128    ("holographic_resonator_step", HOLOGRAPHIC_RESONATOR_STEP),
129];
130
131// =====================================================================
132// TROPICAL ALGEBRA SHADERS
133// =====================================================================
134
135/// Tropical (max-plus) matrix multiplication: C = A ⊗ B where ⊗ is tropical product
136const TROPICAL_MATRIX_MULTIPLY: &str = r#"
137@group(0) @binding(0) var<storage, read> matrix_a: array<f32>;
138@group(0) @binding(1) var<storage, read> matrix_b: array<f32>;
139@group(0) @binding(2) var<storage, read_write> result: array<f32>;
140@group(0) @binding(3) var<storage, read> dimensions: array<u32>; // [M, N, K]
141
142@compute @workgroup_size(16, 16)
143fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
144    let M = dimensions[0];
145    let N = dimensions[1];
146    let K = dimensions[2];
147
148    let row = global_id.x;
149    let col = global_id.y;
150
151    if (row >= M || col >= N) {
152        return;
153    }
154
155    // Tropical matrix multiplication: (A ⊗ B)[i,j] = max_k(A[i,k] + B[k,j])
156    var max_val = -3.4028235e+38; // -infinity in tropical algebra
157
158    for (var k = 0u; k < K; k = k + 1u) {
159        let a_val = matrix_a[row * K + k];
160        let b_val = matrix_b[k * N + col];
161
162        // Tropical multiplication: a ⊗ b = a + b
163        let tropical_product = a_val + b_val;
164
165        // Tropical addition: max operation
166        if (tropical_product > max_val) {
167            max_val = tropical_product;
168        }
169    }
170
171    result[row * N + col] = max_val;
172}
173"#;
174
175/// Tropical vector addition: c = a ⊕ b where ⊕ is max operation
176const TROPICAL_VECTOR_ADD: &str = r#"
177@group(0) @binding(0) var<storage, read> vector_a: array<f32>;
178@group(0) @binding(1) var<storage, read> vector_b: array<f32>;
179@group(0) @binding(2) var<storage, read_write> result: array<f32>;
180
181@compute @workgroup_size(256)
182fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
183    let idx = global_id.x;
184
185    if (idx >= arrayLength(&vector_a)) {
186        return;
187    }
188
189    // Tropical addition: a ⊕ b = max(a, b)
190    result[idx] = max(vector_a[idx], vector_b[idx]);
191}
192"#;
193
194/// Tropical neural network layer computation
195const TROPICAL_NEURAL_NETWORK: &str = r#"
196@group(0) @binding(0) var<storage, read> input: array<f32>;
197@group(0) @binding(1) var<storage, read> weights: array<f32>;
198@group(0) @binding(2) var<storage, read> bias: array<f32>;
199@group(0) @binding(3) var<storage, read_write> output: array<f32>;
200@group(0) @binding(4) var<storage, read> dimensions: array<u32>; // [batch_size, input_size, output_size]
201
202@compute @workgroup_size(16, 16)
203fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
204    let batch_idx = global_id.x;
205    let output_idx = global_id.y;
206
207    let batch_size = dimensions[0];
208    let input_size = dimensions[1];
209    let output_size = dimensions[2];
210
211    if (batch_idx >= batch_size || output_idx >= output_size) {
212        return;
213    }
214
215    // Tropical neural network: max-plus linear transformation
216    var max_val = -3.4028235e+38; // -infinity
217
218    for (var i = 0u; i < input_size; i = i + 1u) {
219        let input_val = input[batch_idx * input_size + i];
220        let weight_val = weights[i * output_size + output_idx];
221
222        // Tropical multiplication: input ⊗ weight = input + weight
223        let product = input_val + weight_val;
224
225        // Tropical addition: max operation
226        if (product > max_val) {
227            max_val = product;
228        }
229    }
230
231    // Add bias (tropical addition = max)
232    let bias_val = bias[output_idx];
233    let final_result = max(max_val, bias_val);
234
235    output[batch_idx * output_size + output_idx] = final_result;
236}
237"#;
238
239// =====================================================================
240// DUAL NUMBER SHADERS (AUTOMATIC DIFFERENTIATION)
241// =====================================================================
242
243/// Forward-mode automatic differentiation for dual numbers
244const DUAL_FORWARD_AD: &str = r#"
245struct DualNumber {
246    real: f32,
247    dual: f32, // derivative part
248}
249
250@group(0) @binding(0) var<storage, read> input_dual: array<DualNumber>;
251@group(0) @binding(1) var<storage, read> operation_params: array<f32>;
252@group(0) @binding(2) var<storage, read_write> output_dual: array<DualNumber>;
253
254@compute @workgroup_size(256)
255fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
256    let idx = global_id.x;
257
258    if (idx >= arrayLength(&input_dual)) {
259        return;
260    }
261
262    let x = input_dual[idx];
263    let op_type = u32(operation_params[0]); // Operation type
264
265    var result: DualNumber;
266
267    // Forward-mode AD for different operations
268    switch (op_type) {
269        case 0u: { // sin(x): (sin(x), cos(x) * dx)
270            result.real = sin(x.real);
271            result.dual = cos(x.real) * x.dual;
272        }
273        case 1u: { // exp(x): (exp(x), exp(x) * dx)
274            let exp_val = exp(x.real);
275            result.real = exp_val;
276            result.dual = exp_val * x.dual;
277        }
278        case 2u: { // x^2: (x^2, 2x * dx)
279            result.real = x.real * x.real;
280            result.dual = 2.0 * x.real * x.dual;
281        }
282        case 3u: { // log(x): (log(x), (1/x) * dx)
283            result.real = log(x.real);
284            result.dual = x.dual / x.real;
285        }
286        default: { // identity
287            result = x;
288        }
289    }
290
291    output_dual[idx] = result;
292}
293"#;
294
295/// Batch gradient computation for multiple functions
296const DUAL_BATCH_GRADIENT: &str = r#"
297struct DualNumber {
298    real: f32,
299    dual: f32,
300}
301
302@group(0) @binding(0) var<storage, read> input_batch: array<DualNumber>;
303@group(0) @binding(1) var<storage, read> function_params: array<f32>;
304@group(0) @binding(2) var<storage, read_write> gradients: array<f32>;
305@group(0) @binding(3) var<storage, read> batch_info: array<u32>; // [batch_size, function_dim]
306
307@compute @workgroup_size(16, 16)
308fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
309    let batch_idx = global_id.x;
310    let var_idx = global_id.y;
311
312    let batch_size = batch_info[0];
313    let function_dim = batch_info[1];
314
315    if (batch_idx >= batch_size || var_idx >= function_dim) {
316        return;
317    }
318
319    let input_idx = batch_idx * function_dim + var_idx;
320    let x = input_batch[input_idx];
321
322    // Compute gradient of composite function f(g(x)) where g is parameterized
323    let param_idx = var_idx % 4u; // Assume up to 4 parameters per function
324    let param = function_params[param_idx];
325
326    // Example: f(x) = param * x^2 + sin(x), gradient = 2 * param * x + cos(x)
327    let gradient = 2.0 * param * x.real + cos(x.real);
328
329    gradients[input_idx] = gradient * x.dual;
330}
331"#;
332
333/// Chain rule implementation for complex function compositions
334const DUAL_CHAIN_RULE: &str = r#"
335struct DualNumber {
336    real: f32,
337    dual: f32,
338}
339
340@group(0) @binding(0) var<storage, read> inner_function: array<DualNumber>;
341@group(0) @binding(1) var<storage, read> outer_params: array<f32>;
342@group(0) @binding(2) var<storage, read_write> composed_result: array<DualNumber>;
343
344@compute @workgroup_size(256)
345fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
346    let idx = global_id.x;
347
348    if (idx >= arrayLength(&inner_function)) {
349        return;
350    }
351
352    let u = inner_function[idx]; // u = g(x), du/dx
353    let outer_type = u32(outer_params[0]);
354
355    var result: DualNumber;
356
357    // Chain rule: d/dx[f(g(x))] = f'(g(x)) * g'(x) = f'(u) * du/dx
358    switch (outer_type) {
359        case 0u: { // f(u) = sin(u)
360            result.real = sin(u.real);
361            result.dual = cos(u.real) * u.dual; // cos(u) * du/dx
362        }
363        case 1u: { // f(u) = u^3
364            result.real = u.real * u.real * u.real;
365            result.dual = 3.0 * u.real * u.real * u.dual; // 3u^2 * du/dx
366        }
367        case 2u: { // f(u) = exp(u)
368            let exp_u = exp(u.real);
369            result.real = exp_u;
370            result.dual = exp_u * u.dual; // exp(u) * du/dx
371        }
372        default: { // f(u) = u (identity)
373            result = u;
374        }
375    }
376
377    composed_result[idx] = result;
378}
379"#;
380
381// =====================================================================
382// FUSION SYSTEM SHADERS
383// =====================================================================
384
385/// TropicalDualClifford operations for LLM evaluation
386const TROPICAL_DUAL_CLIFFORD: &str = r#"
387struct TropicalNumber {
388    value: f32, // Tropical number value
389}
390
391struct DualNumber {
392    real: f32,
393    dual: f32,
394}
395
396struct Multivector {
397    coeffs: array<f32, 8>, // 3D Clifford algebra: 8 basis elements
398}
399
400struct TropicalDualClifford {
401    tropical: TropicalNumber,
402    dual: DualNumber,
403    clifford: Multivector,
404}
405
406@group(0) @binding(0) var<storage, read> input_batch: array<TropicalDualClifford>;
407@group(0) @binding(1) var<storage, read> operation_params: array<f32>;
408@group(0) @binding(2) var<storage, read_write> output_batch: array<TropicalDualClifford>;
409
410@compute @workgroup_size(64)
411fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
412    let idx = global_id.x;
413
414    if (idx >= arrayLength(&input_batch)) {
415        return;
416    }
417
418    let tdc = input_batch[idx];
419    let op_type = u32(operation_params[0]);
420
421    var result: TropicalDualClifford;
422
423    switch (op_type) {
424        case 0u: { // LLM attention computation
425            // Combine tropical path selection with dual gradients and geometric transformations
426            result.tropical.value = max(tdc.tropical.value, operation_params[1]);
427            result.dual.real = tdc.dual.real * operation_params[2];
428            result.dual.dual = tdc.dual.dual * operation_params[2];
429
430            // Geometric rotation in Clifford algebra
431            let angle = operation_params[3];
432            let cos_half = cos(angle * 0.5);
433            let sin_half = sin(angle * 0.5);
434
435            // Simple rotation around e12 plane
436            result.clifford.coeffs[0] = cos_half * tdc.clifford.coeffs[0]; // scalar
437            result.clifford.coeffs[1] = tdc.clifford.coeffs[1]; // e1
438            result.clifford.coeffs[2] = tdc.clifford.coeffs[2]; // e2
439            result.clifford.coeffs[3] = tdc.clifford.coeffs[3]; // e3
440            result.clifford.coeffs[4] = sin_half * tdc.clifford.coeffs[0]; // e12
441            result.clifford.coeffs[5] = tdc.clifford.coeffs[5]; // e13
442            result.clifford.coeffs[6] = tdc.clifford.coeffs[6]; // e23
443            result.clifford.coeffs[7] = tdc.clifford.coeffs[7]; // e123
444        }
445        default: {
446            result = tdc;
447        }
448    }
449
450    output_batch[idx] = result;
451}
452"#;
453
454/// Fusion attention mechanism using tropical algebra
455const FUSION_ATTENTION: &str = r#"
456@group(0) @binding(0) var<storage, read> queries: array<f32>;
457@group(0) @binding(1) var<storage, read> keys: array<f32>;
458@group(0) @binding(2) var<storage, read> values: array<f32>;
459@group(0) @binding(3) var<storage, read_write> attention_output: array<f32>;
460@group(0) @binding(4) var<storage, read> dimensions: array<u32>; // [seq_len, d_model]
461
462@compute @workgroup_size(16, 16)
463fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
464    let seq_pos = global_id.x;
465    let feature_idx = global_id.y;
466
467    let seq_len = dimensions[0];
468    let d_model = dimensions[1];
469
470    if (seq_pos >= seq_len || feature_idx >= d_model) {
471        return;
472    }
473
474    // Tropical attention: use max-plus algebra instead of softmax
475    var max_score = -3.4028235e+38; // -infinity
476    var best_key_idx = 0u;
477
478    // Find the key with maximum tropical attention score
479    for (var key_idx = 0u; key_idx < seq_len; key_idx = key_idx + 1u) {
480        var score = -3.4028235e+38;
481
482        // Compute tropical dot product: sum becomes max, product becomes sum
483        for (var d = 0u; d < d_model; d = d + 1u) {
484            let q = queries[seq_pos * d_model + d];
485            let k = keys[key_idx * d_model + d];
486
487            // Tropical multiplication: q ⊗ k = q + k
488            let tropical_product = q + k;
489
490            // Tropical sum: max operation
491            if (tropical_product > score) {
492                score = tropical_product;
493            }
494        }
495
496        if (score > max_score) {
497            max_score = score;
498            best_key_idx = key_idx;
499        }
500    }
501
502    // Tropical attention: select value from best key (winner-takes-all)
503    attention_output[seq_pos * d_model + feature_idx] =
504        values[best_key_idx * d_model + feature_idx];
505}
506"#;
507
508// =====================================================================
509// HOLOGRAPHIC MEMORY SHADERS
510// =====================================================================
511
512/// Batch binding operation for holographic memory
513/// Computes key ⊛ value for multiple pairs using Clifford geometric product
514pub const HOLOGRAPHIC_BATCH_BIND: &str = r#"
515// TropicalDualClifford representation for GPU
516// We use a simplified 8-dimensional Clifford representation
517struct TDC {
518    // Tropical component (max element)
519    tropical: f32,
520    // Dual component (real and dual parts)
521    dual_real: f32,
522    dual_dual: f32,
523    // Clifford algebra coefficients (8D: scalar, 3 vectors, 3 bivectors, pseudoscalar)
524    clifford: array<f32, 8>,
525    // Padding for alignment
526    _padding: array<f32, 5>,
527}
528
529@group(0) @binding(0) var<storage, read> keys: array<TDC>;
530@group(0) @binding(1) var<storage, read> values: array<TDC>;
531@group(0) @binding(2) var<storage, read_write> results: array<TDC>;
532@group(0) @binding(3) var<uniform> params: array<u32, 4>; // [count, 0, 0, 0]
533
534// Cayley table for 3D Clifford algebra Cl(3,0)
535// Product signs: e_i * e_j where i,j are grade indices
536fn cayley_sign(i: u32, j: u32) -> f32 {
537    // Simplified: for vectors e_i * e_i = 1, e_i * e_j = -e_j * e_i for i != j
538    let signs = array<array<f32, 8>, 8>(
539        // 1    e1   e2   e3   e12  e13  e23  e123
540        array<f32, 8>(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0),   // 1
541        array<f32, 8>(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, -1.0, 1.0),  // e1
542        array<f32, 8>(1.0, -1.0, 1.0, 1.0, 1.0, -1.0, 1.0, 1.0), // e2
543        array<f32, 8>(1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0), // e3
544        array<f32, 8>(1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, 1.0), // e12
545        array<f32, 8>(1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, 1.0), // e13
546        array<f32, 8>(1.0, 1.0, -1.0, 1.0, -1.0, -1.0, -1.0, 1.0), // e23
547        array<f32, 8>(1.0, 1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0), // e123
548    );
549    return signs[i][j];
550}
551
552// Result index for e_i * e_j
553fn cayley_index(i: u32, j: u32) -> u32 {
554    let indices = array<array<u32, 8>, 8>(
555        // 1    e1   e2   e3   e12  e13  e23  e123
556        array<u32, 8>(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u),   // 1
557        array<u32, 8>(1u, 0u, 4u, 5u, 2u, 3u, 7u, 6u),   // e1
558        array<u32, 8>(2u, 4u, 0u, 6u, 1u, 7u, 3u, 5u),   // e2
559        array<u32, 8>(3u, 5u, 6u, 0u, 7u, 1u, 2u, 4u),   // e3
560        array<u32, 8>(4u, 2u, 1u, 7u, 0u, 6u, 5u, 3u),   // e12
561        array<u32, 8>(5u, 3u, 7u, 1u, 6u, 0u, 4u, 2u),   // e13
562        array<u32, 8>(6u, 7u, 3u, 2u, 5u, 4u, 0u, 1u),   // e23
563        array<u32, 8>(7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u),   // e123
564    );
565    return indices[i][j];
566}
567
568@compute @workgroup_size(64)
569fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
570    let idx = global_id.x;
571    let count = params[0];
572
573    if (idx >= count) {
574        return;
575    }
576
577    let key = keys[idx];
578    let value = values[idx];
579
580    var result: TDC;
581
582    // Binding uses geometric product on Clifford components
583    // result = key * value (geometric product)
584    for (var i = 0u; i < 8u; i = i + 1u) {
585        result.clifford[i] = 0.0;
586    }
587
588    for (var i = 0u; i < 8u; i = i + 1u) {
589        for (var j = 0u; j < 8u; j = j + 1u) {
590            let target = cayley_index(i, j);
591            let sign = cayley_sign(i, j);
592            result.clifford[target] += sign * key.clifford[i] * value.clifford[j];
593        }
594    }
595
596    // Tropical: max of both (binding produces new tropical value)
597    result.tropical = max(key.tropical, value.tropical);
598
599    // Dual: product rule for dual numbers
600    result.dual_real = key.dual_real * value.dual_real;
601    result.dual_dual = key.dual_real * value.dual_dual + key.dual_dual * value.dual_real;
602
603    results[idx] = result;
604}
605"#;
606
607/// Batch similarity computation for holographic vectors
608/// Computes pairwise similarities using inner product with reverse: <A B̃>₀
609pub const HOLOGRAPHIC_BATCH_SIMILARITY: &str = r#"
610struct TDC {
611    tropical: f32,
612    dual_real: f32,
613    dual_dual: f32,
614    clifford: array<f32, 8>,
615    _padding: array<f32, 5>,
616}
617
618@group(0) @binding(0) var<storage, read> vectors_a: array<TDC>;
619@group(0) @binding(1) var<storage, read> vectors_b: array<TDC>;
620@group(0) @binding(2) var<storage, read_write> similarities: array<f32>;
621@group(0) @binding(3) var<uniform> params: array<u32, 4>; // [count_a, count_b, mode, 0]
622                                                          // mode: 0=pairwise (a[i] vs b[i]), 1=matrix (all pairs)
623
624// Compute reverse of multivector (flip sign of grades 2 and 3)
625fn reverse_sign(grade: u32) -> f32 {
626    // Grade 0: +1, Grade 1: +1, Grade 2: -1, Grade 3: -1
627    // For Cl(3,0): indices 0=scalar(g0), 1-3=vectors(g1), 4-6=bivectors(g2), 7=trivector(g3)
628    let signs = array<f32, 8>(1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0);
629    return signs[grade];
630}
631
632// Compute scalar product <A B̃>₀ - the proper inner product for similarity
633fn scalar_product_with_reverse(a: TDC, b: TDC) -> f32 {
634    var result = 0.0;
635
636    // For each basis element, compute contribution to scalar part
637    // Using simplified formula: sum of a[i] * b[i] * reverse_sign(i) * cayley_contribution_to_scalar
638    // For diagonal elements (same basis): e_i * e_i contributes to scalar
639    for (var i = 0u; i < 8u; i = i + 1u) {
640        result += a.clifford[i] * b.clifford[i] * reverse_sign(i);
641    }
642
643    return result;
644}
645
646fn norm(v: TDC) -> f32 {
647    var sum = 0.0;
648    for (var i = 0u; i < 8u; i = i + 1u) {
649        sum += v.clifford[i] * v.clifford[i];
650    }
651    return sqrt(sum);
652}
653
654@compute @workgroup_size(256)
655fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
656    let idx = global_id.x;
657    let count_a = params[0];
658    let count_b = params[1];
659    let mode = params[2];
660
661    if (mode == 0u) {
662        // Pairwise mode: similarities[i] = sim(a[i], b[i])
663        if (idx >= count_a) {
664            return;
665        }
666
667        let a = vectors_a[idx];
668        let b = vectors_b[idx];
669
670        let norm_a = norm(a);
671        let norm_b = norm(b);
672
673        if (norm_a < 1e-10 || norm_b < 1e-10) {
674            similarities[idx] = 0.0;
675            return;
676        }
677
678        let inner = scalar_product_with_reverse(a, b);
679        similarities[idx] = inner / (norm_a * norm_b);
680    } else {
681        // Matrix mode: similarities[i * count_b + j] = sim(a[i], b[j])
682        let total = count_a * count_b;
683        if (idx >= total) {
684            return;
685        }
686
687        let i = idx / count_b;
688        let j = idx % count_b;
689
690        let a = vectors_a[i];
691        let b = vectors_b[j];
692
693        let norm_a = norm(a);
694        let norm_b = norm(b);
695
696        if (norm_a < 1e-10 || norm_b < 1e-10) {
697            similarities[idx] = 0.0;
698            return;
699        }
700
701        let inner = scalar_product_with_reverse(a, b);
702        similarities[idx] = inner / (norm_a * norm_b);
703    }
704}
705"#;
706
707/// Bundle all vectors into a superposition (weighted average)
708pub const HOLOGRAPHIC_BUNDLE_ALL: &str = r#"
709struct TDC {
710    tropical: f32,
711    dual_real: f32,
712    dual_dual: f32,
713    clifford: array<f32, 8>,
714    _padding: array<f32, 5>,
715}
716
717@group(0) @binding(0) var<storage, read> vectors: array<TDC>;
718@group(0) @binding(1) var<storage, read_write> result: array<TDC>; // Single output
719@group(0) @binding(2) var<uniform> params: vec4<f32>; // [count, beta, normalize, 0]
720
721// Workgroup shared memory for parallel reduction
722var<workgroup> shared_clifford: array<array<f32, 8>, 64>;
723var<workgroup> shared_tropical: array<f32, 64>;
724var<workgroup> shared_dual_real: array<f32, 64>;
725var<workgroup> shared_dual_dual: array<f32, 64>;
726
727@compute @workgroup_size(64)
728fn main(
729    @builtin(global_invocation_id) global_id: vec3<u32>,
730    @builtin(local_invocation_id) local_id: vec3<u32>,
731    @builtin(workgroup_id) workgroup_id: vec3<u32>
732) {
733    let idx = global_id.x;
734    let local_idx = local_id.x;
735    let count = u32(params.x);
736    let beta = params.y;
737    let do_normalize = params.z > 0.5;
738
739    // Initialize shared memory
740    for (var i = 0u; i < 8u; i = i + 1u) {
741        shared_clifford[local_idx][i] = 0.0;
742    }
743    shared_tropical[local_idx] = -3.4028235e+38; // -inf for tropical
744    shared_dual_real[local_idx] = 0.0;
745    shared_dual_dual[local_idx] = 0.0;
746
747    // Load data into shared memory
748    if (idx < count) {
749        let v = vectors[idx];
750        for (var i = 0u; i < 8u; i = i + 1u) {
751            shared_clifford[local_idx][i] = v.clifford[i];
752        }
753        shared_tropical[local_idx] = v.tropical;
754        shared_dual_real[local_idx] = v.dual_real;
755        shared_dual_dual[local_idx] = v.dual_dual;
756    }
757
758    workgroupBarrier();
759
760    // Parallel reduction
761    for (var stride = 32u; stride > 0u; stride = stride / 2u) {
762        if (local_idx < stride && local_idx + stride < 64u) {
763            // Bundle Clifford components (sum/average)
764            for (var i = 0u; i < 8u; i = i + 1u) {
765                shared_clifford[local_idx][i] += shared_clifford[local_idx + stride][i];
766            }
767            // Tropical: take max
768            shared_tropical[local_idx] = max(shared_tropical[local_idx], shared_tropical[local_idx + stride]);
769            // Dual: sum
770            shared_dual_real[local_idx] += shared_dual_real[local_idx + stride];
771            shared_dual_dual[local_idx] += shared_dual_dual[local_idx + stride];
772        }
773        workgroupBarrier();
774    }
775
776    // Thread 0 writes result
777    if (local_idx == 0u) {
778        var final_result: TDC;
779
780        // Average the Clifford components
781        let scale = 1.0 / f32(count);
782        for (var i = 0u; i < 8u; i = i + 1u) {
783            final_result.clifford[i] = shared_clifford[0][i] * scale;
784        }
785
786        final_result.tropical = shared_tropical[0];
787        final_result.dual_real = shared_dual_real[0] * scale;
788        final_result.dual_dual = shared_dual_dual[0] * scale;
789
790        // Optionally normalize
791        if (do_normalize) {
792            var norm_sq = 0.0;
793            for (var i = 0u; i < 8u; i = i + 1u) {
794                norm_sq += final_result.clifford[i] * final_result.clifford[i];
795            }
796            let norm = sqrt(norm_sq);
797            if (norm > 1e-10) {
798                let inv_norm = 1.0 / norm;
799                for (var i = 0u; i < 8u; i = i + 1u) {
800                    final_result.clifford[i] *= inv_norm;
801                }
802            }
803        }
804
805        result[workgroup_id.x] = final_result;
806    }
807}
808"#;
809
810/// Resonator cleanup step - computes similarities against codebook
811pub const HOLOGRAPHIC_RESONATOR_STEP: &str = r#"
812struct TDC {
813    tropical: f32,
814    dual_real: f32,
815    dual_dual: f32,
816    clifford: array<f32, 8>,
817    _padding: array<f32, 5>,
818}
819
820struct ResonatorOutput {
821    cleaned: TDC,
822    best_index: u32,
823    best_similarity: f32,
824    _padding: array<f32, 2>,
825}
826
827@group(0) @binding(0) var<storage, read> input: TDC;
828@group(0) @binding(1) var<storage, read> codebook: array<TDC>;
829@group(0) @binding(2) var<storage, read_write> output: ResonatorOutput;
830@group(0) @binding(3) var<uniform> params: array<u32, 4>; // [codebook_size, max_iterations, 0, 0]
831
832fn reverse_sign(grade: u32) -> f32 {
833    let signs = array<f32, 8>(1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0);
834    return signs[grade];
835}
836
837fn scalar_product_with_reverse(a: TDC, b: TDC) -> f32 {
838    var result = 0.0;
839    for (var i = 0u; i < 8u; i = i + 1u) {
840        result += a.clifford[i] * b.clifford[i] * reverse_sign(i);
841    }
842    return result;
843}
844
845fn norm(v: TDC) -> f32 {
846    var sum = 0.0;
847    for (var i = 0u; i < 8u; i = i + 1u) {
848        sum += v.clifford[i] * v.clifford[i];
849    }
850    return sqrt(sum);
851}
852
853fn similarity(a: TDC, b: TDC) -> f32 {
854    let norm_a = norm(a);
855    let norm_b = norm(b);
856    if (norm_a < 1e-10 || norm_b < 1e-10) {
857        return 0.0;
858    }
859    return scalar_product_with_reverse(a, b) / (norm_a * norm_b);
860}
861
862// Workgroup shared memory for parallel max finding
863var<workgroup> shared_best_sim: array<f32, 256>;
864var<workgroup> shared_best_idx: array<u32, 256>;
865
866@compute @workgroup_size(256)
867fn main(
868    @builtin(global_invocation_id) global_id: vec3<u32>,
869    @builtin(local_invocation_id) local_id: vec3<u32>
870) {
871    let idx = global_id.x;
872    let local_idx = local_id.x;
873    let codebook_size = params[0];
874
875    // Initialize
876    shared_best_sim[local_idx] = -2.0; // Below minimum similarity
877    shared_best_idx[local_idx] = 0u;
878
879    // Each thread computes similarity for one codebook entry
880    if (idx < codebook_size) {
881        let sim = similarity(input, codebook[idx]);
882        shared_best_sim[local_idx] = sim;
883        shared_best_idx[local_idx] = idx;
884    }
885
886    workgroupBarrier();
887
888    // Parallel reduction to find max
889    for (var stride = 128u; stride > 0u; stride = stride / 2u) {
890        if (local_idx < stride && local_idx + stride < 256u) {
891            if (shared_best_sim[local_idx + stride] > shared_best_sim[local_idx]) {
892                shared_best_sim[local_idx] = shared_best_sim[local_idx + stride];
893                shared_best_idx[local_idx] = shared_best_idx[local_idx + stride];
894            }
895        }
896        workgroupBarrier();
897    }
898
899    // Thread 0 writes result
900    if (local_idx == 0u) {
901        let best_idx = shared_best_idx[0];
902        output.cleaned = codebook[best_idx];
903        output.best_index = best_idx;
904        output.best_similarity = shared_best_sim[0];
905    }
906}
907"#;
908
909// =====================================================================
910// INFORMATION GEOMETRY SHADERS
911// =====================================================================
912
913/// Fisher information matrix computation
914const FISHER_INFORMATION: &str = r#"
915@group(0) @binding(0) var<storage, read> probability_params: array<f32>;
916@group(0) @binding(1) var<storage, read> data_points: array<f32>;
917@group(0) @binding(2) var<storage, read_write> fisher_matrix: array<f32>;
918@group(0) @binding(3) var<storage, read> dimensions: array<u32>; // [n_params, n_data]
919
920@compute @workgroup_size(16, 16)
921fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
922    let param_i = global_id.x;
923    let param_j = global_id.y;
924
925    let n_params = dimensions[0];
926    let n_data = dimensions[1];
927
928    if (param_i >= n_params || param_j >= n_params) {
929        return;
930    }
931
932    // Fisher Information Matrix: I[i,j] = E[∂²log p(x|θ)/∂θᵢ∂θⱼ]
933    var fisher_element = 0.0;
934
935    for (var data_idx = 0u; data_idx < n_data; data_idx = data_idx + 1u) {
936        let x = data_points[data_idx];
937
938        // Gaussian log-likelihood example: log p(x|μ,σ) = -½log(2πσ²) - (x-μ)²/(2σ²)
939        let mu = probability_params[0];
940        let sigma = probability_params[1];
941        let sigma_sq = sigma * sigma;
942
943        var d2_log_p = 0.0;
944
945        if (param_i == 0u && param_j == 0u) { // ∂²/∂μ²
946            d2_log_p = -1.0 / sigma_sq;
947        } else if (param_i == 1u && param_j == 1u) { // ∂²/∂σ²
948            let diff = x - mu;
949            d2_log_p = -1.0 / sigma_sq + 3.0 * diff * diff / (sigma_sq * sigma_sq);
950        } else if ((param_i == 0u && param_j == 1u) || (param_i == 1u && param_j == 0u)) { // ∂²/∂μ∂σ
951            let diff = x - mu;
952            d2_log_p = 2.0 * diff / (sigma_sq * sigma);
953        }
954
955        fisher_element += -d2_log_p; // Fisher = -E[Hessian of log-likelihood]
956    }
957
958    fisher_matrix[param_i * n_params + param_j] = fisher_element / f32(n_data);
959}
960"#;
961
962/// Batch KL divergence computation
963const KL_DIVERGENCE_BATCH: &str = r#"
964@group(0) @binding(0) var<storage, read> distribution_p: array<f32>;
965@group(0) @binding(1) var<storage, read> distribution_q: array<f32>;
966@group(0) @binding(2) var<storage, read_write> kl_divergences: array<f32>;
967@group(0) @binding(3) var<storage, read> batch_info: array<u32>; // [batch_size, dist_size]
968
969@compute @workgroup_size(256)
970fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
971    let batch_idx = global_id.x;
972    let batch_size = batch_info[0];
973    let dist_size = batch_info[1];
974
975    if (batch_idx >= batch_size) {
976        return;
977    }
978
979    // KL divergence: D_KL(P||Q) = Σ P(x) log(P(x)/Q(x))
980    var kl_div = 0.0;
981
982    for (var i = 0u; i < dist_size; i = i + 1u) {
983        let p_i = distribution_p[batch_idx * dist_size + i];
984        let q_i = distribution_q[batch_idx * dist_size + i];
985
986        if (p_i > 1e-10 && q_i > 1e-10) { // Avoid log(0)
987            kl_div += p_i * log(p_i / q_i);
988        }
989    }
990
991    kl_divergences[batch_idx] = kl_div;
992}
993"#;
994
995// =====================================================================
996// CELLULAR AUTOMATA SHADERS
997// =====================================================================
998
999/// Cellular automata evolution step
1000pub const CA_EVOLUTION: &str = r#"
1001@group(0) @binding(0) var<storage, read> current_state: array<u32>;
1002@group(0) @binding(1) var<storage, read_write> next_state: array<u32>;
1003@group(0) @binding(2) var<storage, read> rules: array<u32>;
1004@group(0) @binding(3) var<storage, read> dimensions: array<u32>; // [width, height]
1005
1006@compute @workgroup_size(16, 16)
1007fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1008    let x = global_id.x;
1009    let y = global_id.y;
1010
1011    let width = dimensions[0];
1012    let height = dimensions[1];
1013
1014    if (x >= width || y >= height) {
1015        return;
1016    }
1017
1018    let idx = y * width + x;
1019    let current_cell = current_state[idx];
1020
1021    // Count alive neighbors (Moore neighborhood)
1022    var alive_neighbors = 0u;
1023
1024    for (var dy = 0u; dy < 3u; dy = dy + 1u) {
1025        for (var dx = 0u; dx < 3u; dx = dx + 1u) {
1026            if (dx == 1u && dy == 1u) { continue; } // Skip center cell
1027
1028            let nx = (x + dx + width - 1u) % width; // Wrap around
1029            let ny = (y + dy + height - 1u) % height;
1030            let neighbor_idx = ny * width + nx;
1031
1032            if (current_state[neighbor_idx] == 1u) {
1033                alive_neighbors = alive_neighbors + 1u;
1034            }
1035        }
1036    }
1037
1038    // Conway's Game of Life rules (can be customized via rules buffer)
1039    var new_state = 0u;
1040
1041    if (current_cell == 1u) { // Currently alive
1042        if (alive_neighbors == 2u || alive_neighbors == 3u) {
1043            new_state = 1u; // Survive
1044        }
1045    } else { // Currently dead
1046        if (alive_neighbors == 3u) {
1047            new_state = 1u; // Birth
1048        }
1049    }
1050
1051    next_state[idx] = new_state;
1052}
1053"#;
1054
1055/// Rule application for geometric algebra cellular automata
1056pub const RULE_APPLICATION: &str = r#"
1057struct GpuCellData {
1058    scalar: f32,
1059    e1: f32,
1060    e2: f32,
1061    e3: f32,
1062    e12: f32,
1063    e13: f32,
1064    e23: f32,
1065    e123: f32,
1066    generation: f32,
1067    neighborhood_size: f32,
1068    rule_type: f32,
1069    boundary_condition: f32,
1070    padding: array<f32, 4>,
1071}
1072
1073struct GpuRuleConfig {
1074    rule_type: f32,
1075    threshold: f32,
1076    damping_factor: f32,
1077    energy_conservation: f32,
1078    time_step: f32,
1079    spatial_scale: f32,
1080    geometric_weight: f32,
1081    nonlinear_factor: f32,
1082    boundary_type: f32,
1083    neighborhood_radius: f32,
1084    evolution_speed: f32,
1085    stability_factor: f32,
1086    padding: array<f32, 4>,
1087}
1088
1089@group(0) @binding(0) var<storage, read> cells: array<GpuCellData>;
1090@group(0) @binding(1) var<storage, read> rules: array<GpuRuleConfig>;
1091@group(0) @binding(2) var<storage, read_write> output: array<GpuCellData>;
1092
1093@compute @workgroup_size(256)
1094fn rule_application_main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1095    let idx = global_id.x;
1096
1097    if (idx >= arrayLength(&cells)) {
1098        return;
1099    }
1100
1101    let cell = cells[idx];
1102    let rule = rules[0]; // Use first rule for now
1103
1104    var new_cell = cell;
1105
1106    // Apply damping factor
1107    new_cell.scalar = cell.scalar * (1.0 - rule.damping_factor);
1108    new_cell.e1 = cell.e1 * (1.0 - rule.damping_factor);
1109    new_cell.e2 = cell.e2 * (1.0 - rule.damping_factor);
1110    new_cell.e3 = cell.e3 * (1.0 - rule.damping_factor);
1111    new_cell.e12 = cell.e12 * (1.0 - rule.damping_factor);
1112    new_cell.e13 = cell.e13 * (1.0 - rule.damping_factor);
1113    new_cell.e23 = cell.e23 * (1.0 - rule.damping_factor);
1114    new_cell.e123 = cell.e123 * (1.0 - rule.damping_factor);
1115
1116    // Apply threshold
1117    if (abs(new_cell.scalar) < rule.threshold) {
1118        new_cell.scalar = 0.0;
1119    }
1120
1121    output[idx] = new_cell;
1122}
1123"#;
1124
1125/// Energy calculation for cellular automata
1126pub const ENERGY_CALCULATION: &str = r#"
1127struct GpuCellData {
1128    scalar: f32,
1129    e1: f32,
1130    e2: f32,
1131    e3: f32,
1132    e12: f32,
1133    e13: f32,
1134    e23: f32,
1135    e123: f32,
1136    generation: f32,
1137    neighborhood_size: f32,
1138    rule_type: f32,
1139    boundary_condition: f32,
1140    padding: array<f32, 4>,
1141}
1142
1143@group(0) @binding(0) var<storage, read> cells: array<GpuCellData>;
1144@group(0) @binding(1) var<storage, read_write> total_energy: array<f32>;
1145
1146@compute @workgroup_size(1)
1147fn energy_calculation_main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1148    var energy = 0.0;
1149
1150    // Sum the squared magnitudes of all multivector components
1151    for (var i = 0u; i < arrayLength(&cells); i = i + 1u) {
1152        let cell = cells[i];
1153        energy += cell.scalar * cell.scalar;
1154        energy += cell.e1 * cell.e1;
1155        energy += cell.e2 * cell.e2;
1156        energy += cell.e3 * cell.e3;
1157        energy += cell.e12 * cell.e12;
1158        energy += cell.e13 * cell.e13;
1159        energy += cell.e23 * cell.e23;
1160        energy += cell.e123 * cell.e123;
1161    }
1162
1163    total_energy[0] = energy;
1164}
1165"#;
1166
1167/// Neighbor extraction for cellular automata
1168pub const NEIGHBOR_EXTRACTION: &str = r#"
1169struct GpuCellData {
1170    scalar: f32,
1171    e1: f32,
1172    e2: f32,
1173    e3: f32,
1174    e12: f32,
1175    e13: f32,
1176    e23: f32,
1177    e123: f32,
1178    generation: f32,
1179    neighborhood_size: f32,
1180    rule_type: f32,
1181    boundary_condition: f32,
1182    padding: array<f32, 4>,
1183}
1184
1185@group(0) @binding(0) var<storage, read> cells: array<GpuCellData>;
1186@group(0) @binding(1) var<uniform> params: array<f32, 4>; // [width, height, total_cells, padding]
1187@group(0) @binding(2) var<storage, read_write> neighborhoods: array<GpuCellData>;
1188
1189@compute @workgroup_size(256)
1190fn neighbor_extraction_main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1191    let idx = global_id.x;
1192    let width = u32(params[0]);
1193    let height = u32(params[1]);
1194    let total_cells = u32(params[2]);
1195
1196    if (idx >= total_cells) {
1197        return;
1198    }
1199
1200    // Calculate 2D position from linear index
1201    let x = idx % width;
1202    let y = idx / width;
1203
1204    // Moore neighborhood: 8 neighbors
1205    let offsets = array<vec2<i32>, 8>(
1206        vec2<i32>(-1, -1), vec2<i32>(0, -1), vec2<i32>(1, -1),
1207        vec2<i32>(-1,  0),                   vec2<i32>(1,  0),
1208        vec2<i32>(-1,  1), vec2<i32>(0,  1), vec2<i32>(1,  1)
1209    );
1210
1211    // Extract neighbors with wrapping boundaries
1212    for (var i = 0u; i < 8u; i = i + 1u) {
1213        let offset = offsets[i];
1214        let nx = (i32(x) + offset.x + i32(width)) % i32(width);
1215        let ny = (i32(y) + offset.y + i32(height)) % i32(height);
1216        let neighbor_idx = u32(ny) * width + u32(nx);
1217
1218        // Store neighbor in output array
1219        neighborhoods[idx * 8u + i] = cells[neighbor_idx];
1220    }
1221}
1222"#;
1223
1224/// Self-assembly pattern formation
1225const CA_SELF_ASSEMBLY: &str = r#"
1226@group(0) @binding(0) var<storage, read> particles: array<f32>; // [x, y, type, energy]
1227@group(0) @binding(1) var<storage, read_write> new_particles: array<f32>;
1228@group(0) @binding(2) var<storage, read> assembly_rules: array<f32>;
1229@group(0) @binding(3) var<storage, read> simulation_params: array<u32>; // [n_particles, grid_size]
1230
1231@compute @workgroup_size(64)
1232fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1233    let particle_idx = global_id.x;
1234    let n_particles = simulation_params[0];
1235    let grid_size = simulation_params[1];
1236
1237    if (particle_idx >= n_particles) {
1238        return;
1239    }
1240
1241    let base_idx = particle_idx * 4u;
1242    let x = particles[base_idx];
1243    let y = particles[base_idx + 1u];
1244    let particle_type = particles[base_idx + 2u];
1245    let energy = particles[base_idx + 3u];
1246
1247    // Self-assembly based on local interactions
1248    var new_x = x;
1249    var new_y = y;
1250    var new_energy = energy;
1251
1252    // Calculate forces from nearby particles
1253    var force_x = 0.0;
1254    var force_y = 0.0;
1255
1256    for (var other_idx = 0u; other_idx < n_particles; other_idx = other_idx + 1u) {
1257        if (other_idx == particle_idx) { continue; }
1258
1259        let other_base = other_idx * 4u;
1260        let other_x = particles[other_base];
1261        let other_y = particles[other_base + 1u];
1262        let other_type = particles[other_base + 2u];
1263
1264        let dx = other_x - x;
1265        let dy = other_y - y;
1266        let distance = sqrt(dx * dx + dy * dy);
1267
1268        if (distance < 5.0 && distance > 0.1) { // Interaction range
1269            let interaction_strength = assembly_rules[u32(particle_type) * 4u + u32(other_type)];
1270
1271            // Attractive/repulsive force based on particle types
1272            let force_magnitude = interaction_strength / (distance * distance);
1273            force_x += force_magnitude * dx / distance;
1274            force_y += force_magnitude * dy / distance;
1275        }
1276    }
1277
1278    // Update position based on forces
1279    new_x += force_x * 0.1; // time step
1280    new_y += force_y * 0.1;
1281
1282    // Keep within bounds
1283    new_x = clamp(new_x, 0.0, f32(grid_size));
1284    new_y = clamp(new_y, 0.0, f32(grid_size));
1285
1286    // Energy dissipation
1287    new_energy = energy * 0.99;
1288
1289    new_particles[base_idx] = new_x;
1290    new_particles[base_idx + 1u] = new_y;
1291    new_particles[base_idx + 2u] = particle_type;
1292    new_particles[base_idx + 3u] = new_energy;
1293}
1294"#;
1295
1296// =====================================================================
1297// ENUMERATIVE GEOMETRY SHADERS
1298// =====================================================================
1299
1300/// Intersection theory computations
1301pub const INTERSECTION_THEORY: &str = r#"
1302struct RationalNumber {
1303    numerator: i32,
1304    denominator: i32,
1305}
1306
1307@group(0) @binding(0) var<storage, read> chow_class_a: array<RationalNumber>;
1308@group(0) @binding(1) var<storage, read> chow_class_b: array<RationalNumber>;
1309@group(0) @binding(2) var<storage, read_write> intersection_result: array<RationalNumber>;
1310@group(0) @binding(3) var<storage, read> geometry_params: array<u32>; // [dimension, degree_a, degree_b]
1311
1312fn gcd(a: u32, b: u32) -> u32 {
1313    if (b == 0u) { return a; }
1314    return gcd(b, a % b);
1315}
1316
1317fn add_rationals(a: RationalNumber, b: RationalNumber) -> RationalNumber {
1318    let num = a.numerator * b.denominator + b.numerator * a.denominator;
1319    let den = a.denominator * b.denominator;
1320    let g = gcd(u32(abs(num)), u32(abs(den)));
1321
1322    return RationalNumber(num / i32(g), den / i32(g));
1323}
1324
1325fn multiply_rationals(a: RationalNumber, b: RationalNumber) -> RationalNumber {
1326    let num = a.numerator * b.numerator;
1327    let den = a.denominator * b.denominator;
1328    let g = gcd(u32(abs(num)), u32(abs(den)));
1329
1330    return RationalNumber(num / i32(g), den / i32(g));
1331}
1332
1333@compute @workgroup_size(256)
1334fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1335    let idx = global_id.x;
1336
1337    if (idx >= arrayLength(&chow_class_a)) {
1338        return;
1339    }
1340
1341    let dimension = geometry_params[0];
1342    let degree_a = geometry_params[1];
1343    let degree_b = geometry_params[2];
1344
1345    // Intersection product in Chow ring: A · B
1346    // For simplicity, implement as pointwise multiplication for this example
1347    let a = chow_class_a[idx];
1348    let b = chow_class_b[idx];
1349
1350    // Check degree compatibility (degree_a + degree_b ≤ dimension)
1351    if (degree_a + degree_b <= dimension) {
1352        intersection_result[idx] = multiply_rationals(a, b);
1353    } else {
1354        intersection_result[idx] = RationalNumber(0, 1); // Zero class
1355    }
1356}
1357"#;
1358
1359/// Schubert calculus computations
1360const SCHUBERT_CALCULUS: &str = r#"
1361@group(0) @binding(0) var<storage, read> partition_a: array<u32>;
1362@group(0) @binding(1) var<storage, read> partition_b: array<u32>;
1363@group(0) @binding(2) var<storage, read_write> littlewood_coeff: array<u32>;
1364@group(0) @binding(3) var<storage, read> grassmann_params: array<u32>; // [n, k]
1365
1366@compute @workgroup_size(128)
1367fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1368    let coeff_idx = global_id.x;
1369
1370    let n = grassmann_params[0]; // Dimension of ambient space
1371    let k = grassmann_params[1]; // Dimension of subspaces
1372
1373    if (coeff_idx >= arrayLength(&littlewood_coeff)) {
1374        return;
1375    }
1376
1377    // Schubert calculus: compute Littlewood-Richardson coefficients
1378    // This is a simplified version - full LR coefficients require more complex algorithms
1379
1380    let max_parts = min(arrayLength(&partition_a), arrayLength(&partition_b));
1381    var coefficient = 0u;
1382
1383    // Simplified intersection number computation
1384    for (var i = 0u; i < max_parts; i = i + 1u) {
1385        let part_a = partition_a[i];
1386        let part_b = partition_b[i];
1387
1388        // Check compatibility with Grassmannian Gr(k, n)
1389        if (part_a <= n - k && part_b <= n - k) {
1390            coefficient += part_a * part_b;
1391        }
1392    }
1393
1394    littlewood_coeff[coeff_idx] = coefficient;
1395}
1396"#;
1397
1398// =====================================================================
1399// TOPOLOGY SHADERS
1400// =====================================================================
1401
1402/// Topology shader collection
1403pub const TOPOLOGY_SHADERS: &[(&str, &str)] = &[
1404    ("topology_distance_matrix", TOPOLOGY_DISTANCE_MATRIX),
1405    ("topology_morse_critical", TOPOLOGY_MORSE_CRITICAL),
1406    ("topology_boundary_matrix", TOPOLOGY_BOUNDARY_MATRIX),
1407    ("topology_matrix_reduction", TOPOLOGY_MATRIX_REDUCTION),
1408];
1409
1410/// Distance matrix computation for Rips filtration
1411pub const TOPOLOGY_DISTANCE_MATRIX: &str = r#"
1412struct Point {
1413    x: f32,
1414    y: f32,
1415    z: f32,
1416    w: f32,
1417}
1418
1419@group(0) @binding(0)
1420var<storage, read> points: array<Point>;
1421
1422@group(0) @binding(1)
1423var<storage, read_write> distances: array<f32>;
1424
1425@compute @workgroup_size(8, 8)
1426fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1427    let i = global_id.x;
1428    let j = global_id.y;
1429    let num_points = arrayLength(&points);
1430
1431    if (i >= num_points || j >= num_points) {
1432        return;
1433    }
1434
1435    let idx = i * num_points + j;
1436
1437    if (i == j) {
1438        distances[idx] = 0.0;
1439        return;
1440    }
1441
1442    let pi = points[i];
1443    let pj = points[j];
1444
1445    let dx = pi.x - pj.x;
1446    let dy = pi.y - pj.y;
1447    let dz = pi.z - pj.z;
1448    let dw = pi.w - pj.w;
1449
1450    // Euclidean distance (supports up to 4D)
1451    distances[idx] = sqrt(dx * dx + dy * dy + dz * dz + dw * dw);
1452}
1453"#;
1454
1455/// Morse critical point detection on 2D height function grid
1456pub const TOPOLOGY_MORSE_CRITICAL: &str = r#"
1457struct CriticalPoint {
1458    x: u32,
1459    y: u32,
1460    critical_type: u32,  // 0=min, 1=saddle, 2=max
1461    value: f32,
1462}
1463
1464@group(0) @binding(0)
1465var<storage, read> values: array<f32>;
1466
1467@group(0) @binding(1)
1468var<uniform> dims: vec2<u32>;  // width, height
1469
1470@group(0) @binding(2)
1471var<storage, read_write> critical_points: array<CriticalPoint>;
1472
1473@group(0) @binding(3)
1474var<storage, read_write> counter: atomic<u32>;
1475
1476fn get_value(x: u32, y: u32) -> f32 {
1477    return values[y * dims.x + x];
1478}
1479
1480@compute @workgroup_size(16, 16)
1481fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1482    // Interior points only (offset by 1)
1483    let x = global_id.x + 1u;
1484    let y = global_id.y + 1u;
1485
1486    if (x >= dims.x - 1u || y >= dims.y - 1u) {
1487        return;
1488    }
1489
1490    let v = get_value(x, y);
1491
1492    // Get 8-neighbors
1493    let n0 = get_value(x - 1u, y - 1u);
1494    let n1 = get_value(x, y - 1u);
1495    let n2 = get_value(x + 1u, y - 1u);
1496    let n3 = get_value(x - 1u, y);
1497    let n4 = get_value(x + 1u, y);
1498    let n5 = get_value(x - 1u, y + 1u);
1499    let n6 = get_value(x, y + 1u);
1500    let n7 = get_value(x + 1u, y + 1u);
1501
1502    // Count neighbors lower/higher than center
1503    var lower_count = 0u;
1504    var upper_count = 0u;
1505
1506    if (n0 < v) { lower_count += 1u; } else if (n0 > v) { upper_count += 1u; }
1507    if (n1 < v) { lower_count += 1u; } else if (n1 > v) { upper_count += 1u; }
1508    if (n2 < v) { lower_count += 1u; } else if (n2 > v) { upper_count += 1u; }
1509    if (n3 < v) { lower_count += 1u; } else if (n3 > v) { upper_count += 1u; }
1510    if (n4 < v) { lower_count += 1u; } else if (n4 > v) { upper_count += 1u; }
1511    if (n5 < v) { lower_count += 1u; } else if (n5 > v) { upper_count += 1u; }
1512    if (n6 < v) { lower_count += 1u; } else if (n6 > v) { upper_count += 1u; }
1513    if (n7 < v) { lower_count += 1u; } else if (n7 > v) { upper_count += 1u; }
1514
1515    var critical_type = 3u;  // 3 = not critical
1516
1517    if (lower_count == 8u) {
1518        critical_type = 2u;  // Maximum
1519    } else if (upper_count == 8u) {
1520        critical_type = 0u;  // Minimum
1521    } else if (lower_count > 0u && upper_count > 0u) {
1522        // Check for saddle by counting sign changes around boundary
1523        var signs = array<bool, 8>(
1524            n0 > v, n1 > v, n2 > v, n3 > v, n4 > v, n5 > v, n6 > v, n7 > v
1525        );
1526
1527        var changes = 0u;
1528        if (signs[0] != signs[1]) { changes += 1u; }
1529        if (signs[1] != signs[2]) { changes += 1u; }
1530        if (signs[2] != signs[4]) { changes += 1u; }
1531        if (signs[4] != signs[7]) { changes += 1u; }
1532        if (signs[7] != signs[6]) { changes += 1u; }
1533        if (signs[6] != signs[5]) { changes += 1u; }
1534        if (signs[5] != signs[3]) { changes += 1u; }
1535        if (signs[3] != signs[0]) { changes += 1u; }
1536
1537        if (changes >= 4u) {
1538            critical_type = 1u;  // Saddle
1539        }
1540    }
1541
1542    if (critical_type < 3u) {
1543        let idx = atomicAdd(&counter, 1u);
1544        critical_points[idx] = CriticalPoint(x, y, critical_type, v);
1545    }
1546}
1547"#;
1548
1549/// Boundary matrix construction for simplicial complex (sparse format)
1550pub const TOPOLOGY_BOUNDARY_MATRIX: &str = r#"
1551struct Simplex {
1552    vertices: array<u32, 8>,  // Max 7-simplex
1553    dimension: u32,
1554    filtration_time: f32,
1555    padding: array<u32, 2>,
1556}
1557
1558struct MatrixEntry {
1559    row: u32,
1560    col: u32,
1561    value: i32,
1562    padding: u32,
1563}
1564
1565@group(0) @binding(0)
1566var<storage, read> simplices: array<Simplex>;
1567
1568@group(0) @binding(1)
1569var<storage, read_write> boundary_entries: array<MatrixEntry>;
1570
1571@group(0) @binding(2)
1572var<storage, read_write> entry_counter: atomic<u32>;
1573
1574@compute @workgroup_size(256)
1575fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1576    let simplex_idx = global_id.x;
1577    if (simplex_idx >= arrayLength(&simplices)) {
1578        return;
1579    }
1580
1581    let s = simplices[simplex_idx];
1582    if (s.dimension == 0u) {
1583        return;  // 0-simplices have no boundary
1584    }
1585
1586    // Generate boundary faces with alternating signs
1587    let dim = s.dimension;
1588    for (var i = 0u; i <= dim; i++) {
1589        let sign = select(-1, 1, i % 2u == 0u);
1590
1591        // Allocate entry atomically
1592        let entry_idx = atomicAdd(&entry_counter, 1u);
1593
1594        // Compute hash of face (for row index)
1595        var face_hash = 0u;
1596        for (var j = 0u; j <= dim; j++) {
1597            if (j != i) {
1598                face_hash = face_hash * 31u + s.vertices[j];
1599            }
1600        }
1601
1602        boundary_entries[entry_idx] = MatrixEntry(face_hash, simplex_idx, sign, 0u);
1603    }
1604}
1605"#;
1606
1607/// Parallel matrix reduction for homology computation
1608pub const TOPOLOGY_MATRIX_REDUCTION: &str = r#"
1609// Parallel column reduction using GPU
1610// Finds pivot rows for each column
1611
1612@group(0) @binding(0)
1613var<storage, read_write> matrix: array<i32>;
1614
1615@group(0) @binding(1)
1616var<uniform> dims: vec2<u32>;  // rows, cols
1617
1618@group(0) @binding(2)
1619var<storage, read_write> pivots: array<u32>;
1620
1621@compute @workgroup_size(256)
1622fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1623    let col = global_id.x;
1624    let rows = dims.x;
1625    let cols = dims.y;
1626
1627    if (col >= cols) {
1628        return;
1629    }
1630
1631    // Find lowest non-zero in column (pivot row)
1632    var pivot_row = rows;  // rows means no pivot
1633    for (var row = 0u; row < rows; row++) {
1634        let idx = row * cols + col;
1635        if (matrix[idx] != 0) {
1636            pivot_row = row;
1637        }
1638    }
1639
1640    pivots[col] = pivot_row;
1641}
1642"#;
1643
1644#[cfg(test)]
1645mod tests {
1646    use super::*;
1647
1648    #[test]
1649    fn test_shader_library_creation() {
1650        let library = ShaderLibrary::new();
1651        let shaders = library.list_shaders();
1652
1653        // Should have shaders for all mathematical domains
1654        assert!(shaders.contains(&"tropical_matrix_multiply".to_string()));
1655        assert!(shaders.contains(&"dual_forward_ad".to_string()));
1656        assert!(shaders.contains(&"tropical_dual_clifford".to_string()));
1657        assert!(shaders.contains(&"fisher_information".to_string()));
1658        assert!(shaders.contains(&"ca_evolution".to_string()));
1659        assert!(shaders.contains(&"intersection_theory".to_string()));
1660
1661        // Holographic memory shaders
1662        assert!(shaders.contains(&"holographic_batch_bind".to_string()));
1663        assert!(shaders.contains(&"holographic_batch_similarity".to_string()));
1664        assert!(shaders.contains(&"holographic_bundle_all".to_string()));
1665        assert!(shaders.contains(&"holographic_resonator_step".to_string()));
1666    }
1667
1668    #[test]
1669    fn test_shader_retrieval() {
1670        let library = ShaderLibrary::new();
1671
1672        let shader = library.get_shader("tropical_matrix_multiply");
1673        assert!(shader.is_some());
1674        assert!(shader.unwrap().contains("@compute"));
1675        assert!(shader.unwrap().contains("tropical"));
1676    }
1677
1678    #[test]
1679    fn test_shader_constants() {
1680        assert_eq!(TROPICAL_SHADERS.len(), 3);
1681        assert_eq!(DUAL_SHADERS.len(), 3);
1682        assert_eq!(FUSION_SHADERS.len(), 2);
1683        assert_eq!(HOLOGRAPHIC_SHADERS.len(), 4);
1684    }
1685
1686    #[test]
1687    fn test_holographic_shaders() {
1688        // Verify holographic shaders contain expected WGSL patterns
1689        assert!(HOLOGRAPHIC_BATCH_BIND.contains("@compute"));
1690        assert!(HOLOGRAPHIC_BATCH_BIND.contains("cayley"));
1691
1692        assert!(HOLOGRAPHIC_BATCH_SIMILARITY.contains("@compute"));
1693        assert!(HOLOGRAPHIC_BATCH_SIMILARITY.contains("similarity"));
1694
1695        assert!(HOLOGRAPHIC_BUNDLE_ALL.contains("@compute"));
1696        assert!(HOLOGRAPHIC_BUNDLE_ALL.contains("workgroupBarrier"));
1697
1698        assert!(HOLOGRAPHIC_RESONATOR_STEP.contains("@compute"));
1699        assert!(HOLOGRAPHIC_RESONATOR_STEP.contains("codebook"));
1700    }
1701}
amari_gpu/shaders.rs

amari_gpu/
shaders.rs