1pub mod adaptive;
4pub mod verification;
5
6pub use adaptive::{
7 AdaptiveVerificationError, AdaptiveVerificationLevel, AdaptiveVerifier, CpuFeatures,
8 GpuBackend, PlatformCapabilities, PlatformPerformanceProfile, VerificationPlatform,
9 WasmEnvironment,
10};
11use amari_core::Multivector;
12use amari_info_geom::amari_chentsov_tensor;
13use bytemuck::{Pod, Zeroable};
14use thiserror::Error;
15pub use verification::{
16 GpuBoundaryVerifier, GpuVerificationError, StatisticalGpuVerifier, VerificationConfig,
17 VerificationStrategy, VerifiedMultivector,
18};
19use wgpu::util::DeviceExt;
20
21#[derive(Error, Debug)]
22pub enum GpuError {
23 #[error("Failed to initialize GPU: {0}")]
24 InitializationError(String),
25
26 #[error("GPU buffer error: {0}")]
27 BufferError(String),
28
29 #[error("Shader compilation error: {0}")]
30 ShaderError(String),
31}
32
33pub struct GpuCliffordAlgebra {
35 device: wgpu::Device,
36 queue: wgpu::Queue,
37 compute_pipeline: wgpu::ComputePipeline,
38 cayley_buffer: wgpu::Buffer,
39 #[allow(dead_code)]
40 dim: usize,
41 basis_count: usize,
42}
43
44impl GpuCliffordAlgebra {
45 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Result<Self, GpuError> {
47 let instance = wgpu::Instance::default();
48
49 let adapter = instance
50 .request_adapter(&wgpu::RequestAdapterOptions {
51 power_preference: wgpu::PowerPreference::HighPerformance,
52 compatible_surface: None,
53 force_fallback_adapter: false,
54 })
55 .await
56 .ok_or_else(|| GpuError::InitializationError("No GPU adapter found".to_string()))?;
57
58 let (device, queue) = adapter
59 .request_device(
60 &wgpu::DeviceDescriptor {
61 label: Some("Amari GPU Device"),
62 required_features: wgpu::Features::empty(),
63 required_limits: wgpu::Limits::default(),
64 },
65 None,
66 )
67 .await
68 .map_err(|e| GpuError::InitializationError(e.to_string()))?;
69
70 let dim = P + Q + R;
71 let basis_count = 1 << dim;
72
73 let cayley_table = Self::generate_cayley_table::<P, Q, R>();
75 let cayley_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
76 label: Some("Cayley Table"),
77 contents: bytemuck::cast_slice(&cayley_table),
78 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
79 });
80
81 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
83 label: Some("Geometric Product Shader"),
84 source: wgpu::ShaderSource::Wgsl(GEOMETRIC_PRODUCT_SHADER.into()),
85 });
86
87 let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
88 label: Some("Compute Bind Group Layout"),
89 entries: &[
90 wgpu::BindGroupLayoutEntry {
91 binding: 0,
92 visibility: wgpu::ShaderStages::COMPUTE,
93 ty: wgpu::BindingType::Buffer {
94 ty: wgpu::BufferBindingType::Storage { read_only: true },
95 has_dynamic_offset: false,
96 min_binding_size: None,
97 },
98 count: None,
99 },
100 wgpu::BindGroupLayoutEntry {
101 binding: 1,
102 visibility: wgpu::ShaderStages::COMPUTE,
103 ty: wgpu::BindingType::Buffer {
104 ty: wgpu::BufferBindingType::Storage { read_only: true },
105 has_dynamic_offset: false,
106 min_binding_size: None,
107 },
108 count: None,
109 },
110 wgpu::BindGroupLayoutEntry {
111 binding: 2,
112 visibility: wgpu::ShaderStages::COMPUTE,
113 ty: wgpu::BindingType::Buffer {
114 ty: wgpu::BufferBindingType::Storage { read_only: true },
115 has_dynamic_offset: false,
116 min_binding_size: None,
117 },
118 count: None,
119 },
120 wgpu::BindGroupLayoutEntry {
121 binding: 3,
122 visibility: wgpu::ShaderStages::COMPUTE,
123 ty: wgpu::BindingType::Buffer {
124 ty: wgpu::BufferBindingType::Storage { read_only: false },
125 has_dynamic_offset: false,
126 min_binding_size: None,
127 },
128 count: None,
129 },
130 ],
131 });
132
133 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
134 label: Some("Compute Pipeline Layout"),
135 bind_group_layouts: &[&bind_group_layout],
136 push_constant_ranges: &[],
137 });
138
139 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
140 label: Some("Geometric Product Pipeline"),
141 layout: Some(&pipeline_layout),
142 module: &shader,
143 entry_point: "main",
144 });
145
146 Ok(Self {
147 device,
148 queue,
149 compute_pipeline,
150 cayley_buffer,
151 dim,
152 basis_count,
153 })
154 }
155
156 fn generate_cayley_table<const P: usize, const Q: usize, const R: usize>() -> Vec<CayleyEntry> {
158 use amari_core::cayley::CayleyTable;
159
160 let table = CayleyTable::<P, Q, R>::get();
161 let basis_count = 1 << (P + Q + R);
162 let mut flat_table = Vec::with_capacity(basis_count * basis_count);
163
164 for i in 0..basis_count {
165 for j in 0..basis_count {
166 let (sign, index) = table.get_product(i, j);
167 flat_table.push(CayleyEntry {
168 sign: sign as f32,
169 index: index as u32,
170 });
171 }
172 }
173
174 flat_table
175 }
176
177 pub async fn batch_geometric_product(
179 &self,
180 a_batch: &[f64],
181 b_batch: &[f64],
182 ) -> Result<Vec<f64>, GpuError> {
183 let batch_size = a_batch.len() / self.basis_count;
184
185 if a_batch.len() != b_batch.len() {
186 return Err(GpuError::BufferError(
187 "Input batches must have same size".to_string(),
188 ));
189 }
190
191 let a_f32: Vec<f32> = a_batch.iter().map(|&x| x as f32).collect();
193 let b_f32: Vec<f32> = b_batch.iter().map(|&x| x as f32).collect();
194
195 let a_buffer = self
197 .device
198 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
199 label: Some("A Buffer"),
200 contents: bytemuck::cast_slice(&a_f32),
201 usage: wgpu::BufferUsages::STORAGE,
202 });
203
204 let b_buffer = self
205 .device
206 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
207 label: Some("B Buffer"),
208 contents: bytemuck::cast_slice(&b_f32),
209 usage: wgpu::BufferUsages::STORAGE,
210 });
211
212 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
213 label: Some("Output Buffer"),
214 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
215 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
216 mapped_at_creation: false,
217 });
218
219 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
220 label: Some("Staging Buffer"),
221 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
222 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
223 mapped_at_creation: false,
224 });
225
226 let bind_group_layout = self.compute_pipeline.get_bind_group_layout(0);
228 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
229 label: Some("Compute Bind Group"),
230 layout: &bind_group_layout,
231 entries: &[
232 wgpu::BindGroupEntry {
233 binding: 0,
234 resource: self.cayley_buffer.as_entire_binding(),
235 },
236 wgpu::BindGroupEntry {
237 binding: 1,
238 resource: a_buffer.as_entire_binding(),
239 },
240 wgpu::BindGroupEntry {
241 binding: 2,
242 resource: b_buffer.as_entire_binding(),
243 },
244 wgpu::BindGroupEntry {
245 binding: 3,
246 resource: output_buffer.as_entire_binding(),
247 },
248 ],
249 });
250
251 let mut encoder = self
253 .device
254 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
255 label: Some("Compute Encoder"),
256 });
257
258 {
259 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
260 label: Some("Compute Pass"),
261 timestamp_writes: None,
262 });
263
264 compute_pass.set_pipeline(&self.compute_pipeline);
265 compute_pass.set_bind_group(0, &bind_group, &[]);
266 compute_pass.dispatch_workgroups(batch_size as u32, 1, 1);
267 }
268
269 encoder.copy_buffer_to_buffer(
270 &output_buffer,
271 0,
272 &staging_buffer,
273 0,
274 (a_batch.len() * std::mem::size_of::<f32>()) as u64,
275 );
276
277 self.queue.submit(Some(encoder.finish()));
278
279 let buffer_slice = staging_buffer.slice(..);
281 let (sender, receiver) = futures::channel::oneshot::channel();
282 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
283 sender.send(result).unwrap();
284 });
285
286 self.device.poll(wgpu::Maintain::Wait);
287 receiver
288 .await
289 .unwrap()
290 .map_err(|e| GpuError::BufferError(e.to_string()))?;
291
292 let data = buffer_slice.get_mapped_range();
293 let result_f32: &[f32] = bytemuck::cast_slice(&data);
294 let result: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
295
296 drop(data);
297 staging_buffer.unmap();
298
299 Ok(result)
300 }
301
302 pub fn should_use_gpu(operation_count: usize) -> bool {
304 operation_count >= 100
306 }
307}
308
309#[repr(C)]
311#[derive(Copy, Clone, Pod, Zeroable)]
312struct CayleyEntry {
313 sign: f32,
314 index: u32,
315}
316
317const GEOMETRIC_PRODUCT_SHADER: &str = r#"
319struct CayleyEntry {
320 sign: f32,
321 index: u32,
322}
323
324@group(0) @binding(0)
325var<storage, read> cayley_table: array<CayleyEntry>;
326
327@group(0) @binding(1)
328var<storage, read> a_batch: array<f32>;
329
330@group(0) @binding(2)
331var<storage, read> b_batch: array<f32>;
332
333@group(0) @binding(3)
334var<storage, read_write> output: array<f32>;
335
336const BASIS_COUNT: u32 = 8u; // For 3D Clifford algebra
337
338@compute @workgroup_size(1)
339fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
340 let batch_idx = global_id.x;
341 let offset = batch_idx * BASIS_COUNT;
342
343 // Clear output
344 for (var k = 0u; k < BASIS_COUNT; k = k + 1u) {
345 output[offset + k] = 0.0;
346 }
347
348 // Compute geometric product
349 for (var i = 0u; i < BASIS_COUNT; i = i + 1u) {
350 let a_coeff = a_batch[offset + i];
351 if (abs(a_coeff) < 1e-14) {
352 continue;
353 }
354
355 for (var j = 0u; j < BASIS_COUNT; j = j + 1u) {
356 let b_coeff = b_batch[offset + j];
357 if (abs(b_coeff) < 1e-14) {
358 continue;
359 }
360
361 let table_idx = i * BASIS_COUNT + j;
362 let entry = cayley_table[table_idx];
363 output[offset + entry.index] += entry.sign * a_coeff * b_coeff;
364 }
365 }
366}
367"#;
368
369pub struct AdaptiveCompute {
371 gpu: Option<GpuCliffordAlgebra>,
372}
373
374impl AdaptiveCompute {
375 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Self {
377 let gpu = GpuCliffordAlgebra::new::<P, Q, R>().await.ok();
378 Self { gpu }
379 }
380
381 pub async fn geometric_product<const P: usize, const Q: usize, const R: usize>(
383 &self,
384 a: &Multivector<P, Q, R>,
385 b: &Multivector<P, Q, R>,
386 ) -> Multivector<P, Q, R> {
387 a.geometric_product(b)
389 }
390
391 pub async fn batch_geometric_product(
393 &self,
394 a_batch: &[f64],
395 b_batch: &[f64],
396 ) -> Result<Vec<f64>, GpuError> {
397 let batch_size = a_batch.len() / 8; if let Some(gpu) = &self.gpu {
400 if GpuCliffordAlgebra::should_use_gpu(batch_size) {
401 return gpu.batch_geometric_product(a_batch, b_batch).await;
402 }
403 }
404
405 let mut result = Vec::with_capacity(a_batch.len());
407 for i in 0..batch_size {
408 let start = i * 8;
409 let end = start + 8;
410
411 let a = Multivector::<3, 0, 0>::from_coefficients(a_batch[start..end].to_vec());
412 let b = Multivector::<3, 0, 0>::from_coefficients(b_batch[start..end].to_vec());
413 let product = a.geometric_product(&b);
414
415 for j in 0..8 {
416 result.push(product.get(j));
417 }
418 }
419
420 Ok(result)
421 }
422}
423
424pub struct GpuInfoGeometry {
435 device: wgpu::Device,
436 queue: wgpu::Queue,
437 tensor_pipeline: wgpu::ComputePipeline,
438 #[allow(dead_code)]
439 fisher_pipeline: wgpu::ComputePipeline,
440 #[allow(dead_code)]
441 divergence_pipeline: wgpu::ComputePipeline,
442}
443
444impl GpuInfoGeometry {
445 pub async fn new() -> Result<Self, GpuError> {
447 let instance = wgpu::Instance::default();
448
449 let adapter = if let Some(adapter) = instance
451 .request_adapter(&wgpu::RequestAdapterOptions {
452 power_preference: wgpu::PowerPreference::HighPerformance,
453 compatible_surface: None,
454 force_fallback_adapter: false,
455 })
456 .await
457 {
458 adapter
459 } else if let Some(adapter) = instance
460 .request_adapter(&wgpu::RequestAdapterOptions {
461 power_preference: wgpu::PowerPreference::LowPower,
462 compatible_surface: None,
463 force_fallback_adapter: false,
464 })
465 .await
466 {
467 adapter
468 } else if let Some(adapter) = instance
469 .request_adapter(&wgpu::RequestAdapterOptions {
470 power_preference: wgpu::PowerPreference::None,
471 compatible_surface: None,
472 force_fallback_adapter: true,
473 })
474 .await
475 {
476 adapter
477 } else {
478 return Err(GpuError::InitializationError(
479 "No GPU adapter found".to_string(),
480 ));
481 };
482
483 let (device, queue) = adapter
484 .request_device(
485 &wgpu::DeviceDescriptor {
486 label: Some("Amari GPU Info Geometry Device"),
487 required_features: wgpu::Features::empty(),
488 required_limits: wgpu::Limits::default(),
489 },
490 None,
491 )
492 .await
493 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
494
495 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
497 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
498 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
499
500 Ok(Self {
501 device,
502 queue,
503 tensor_pipeline,
504 fisher_pipeline,
505 divergence_pipeline,
506 })
507 }
508
509 pub async fn new_with_device_preference(device_type: &str) -> Result<Self, GpuError> {
511 let (power_preference, force_fallback) = match device_type {
512 "high-performance" => (wgpu::PowerPreference::HighPerformance, false),
513 "low-power" => (wgpu::PowerPreference::LowPower, false),
514 "fallback" => (wgpu::PowerPreference::None, true),
515 _ => {
516 return Err(GpuError::InitializationError(
517 "Invalid device type".to_string(),
518 ))
519 }
520 };
521
522 let instance = wgpu::Instance::default();
523
524 let adapter = instance
525 .request_adapter(&wgpu::RequestAdapterOptions {
526 power_preference,
527 compatible_surface: None,
528 force_fallback_adapter: force_fallback,
529 })
530 .await
531 .ok_or_else(|| {
532 GpuError::InitializationError("No suitable adapter found".to_string())
533 })?;
534
535 let (device, queue) = adapter
536 .request_device(
537 &wgpu::DeviceDescriptor {
538 label: Some("Amari GPU Info Geometry Device"),
539 required_features: wgpu::Features::empty(),
540 required_limits: wgpu::Limits::default(),
541 },
542 None,
543 )
544 .await
545 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
546
547 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
548 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
549 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
550
551 Ok(Self {
552 device,
553 queue,
554 tensor_pipeline,
555 fisher_pipeline,
556 divergence_pipeline,
557 })
558 }
559
560 pub async fn amari_chentsov_tensor(
562 &self,
563 x: &Multivector<3, 0, 0>,
564 y: &Multivector<3, 0, 0>,
565 z: &Multivector<3, 0, 0>,
566 ) -> Result<f64, GpuError> {
567 Ok(amari_chentsov_tensor(x, y, z))
569 }
570
571 pub async fn amari_chentsov_tensor_batch(
582 &self,
583 x_batch: &[Multivector<3, 0, 0>],
584 y_batch: &[Multivector<3, 0, 0>],
585 z_batch: &[Multivector<3, 0, 0>],
586 ) -> Result<Vec<f64>, GpuError> {
587 let batch_size = x_batch.len();
588 if batch_size == 0 {
589 return Ok(Vec::new());
590 }
591
592 if batch_size < 100 {
594 let results = x_batch
595 .iter()
596 .zip(y_batch.iter())
597 .zip(z_batch.iter())
598 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
599 .collect();
600 return Ok(results);
601 }
602
603 let results = x_batch
607 .iter()
608 .zip(y_batch.iter())
609 .zip(z_batch.iter())
610 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
611 .collect();
612 Ok(results)
613 }
614
615 pub async fn amari_chentsov_tensor_from_typed_arrays(
617 &self,
618 flat_data: &[f64],
619 batch_size: usize,
620 ) -> Result<Vec<f64>, GpuError> {
621 if flat_data.len() != batch_size * 9 {
622 return Err(GpuError::BufferError("Invalid flat data size".to_string()));
623 }
624
625 let mut x_batch = Vec::with_capacity(batch_size);
627 let mut y_batch = Vec::with_capacity(batch_size);
628 let mut z_batch = Vec::with_capacity(batch_size);
629
630 for i in 0..batch_size {
631 let base = i * 9;
632 let mut x = Multivector::zero();
633 let mut y = Multivector::zero();
634 let mut z = Multivector::zero();
635
636 x.set_vector_component(0, flat_data[base]);
638 x.set_vector_component(1, flat_data[base + 1]);
639 x.set_vector_component(2, flat_data[base + 2]);
640
641 y.set_vector_component(0, flat_data[base + 3]);
642 y.set_vector_component(1, flat_data[base + 4]);
643 y.set_vector_component(2, flat_data[base + 5]);
644
645 z.set_vector_component(0, flat_data[base + 6]);
646 z.set_vector_component(1, flat_data[base + 7]);
647 z.set_vector_component(2, flat_data[base + 8]);
648
649 x_batch.push(x);
650 y_batch.push(y);
651 z_batch.push(z);
652 }
653
654 self.amari_chentsov_tensor_batch(&x_batch, &y_batch, &z_batch)
655 .await
656 }
657
658 pub async fn device_info(&self) -> Result<GpuDeviceInfo, GpuError> {
660 Ok(GpuDeviceInfo::new(true, "WebGPU Device"))
661 }
662
663 pub async fn memory_usage(&self) -> Result<u64, GpuError> {
665 Ok(1024 * 1024) }
668
669 pub async fn fisher_information_matrix(
671 &self,
672 _parameters: &[f64],
673 ) -> Result<GpuFisherMatrix, GpuError> {
674 Ok(GpuFisherMatrix::new(vec![vec![1.0, 0.0], vec![0.0, 1.0]]))
676 }
677
678 pub async fn bregman_divergence_batch(
680 &self,
681 p_batch: &[Vec<f64>],
682 q_batch: &[Vec<f64>],
683 ) -> Result<Vec<f64>, GpuError> {
684 let results = p_batch
686 .iter()
687 .zip(q_batch.iter())
688 .map(|(p, q)| {
689 p.iter()
691 .zip(q.iter())
692 .map(|(pi, qi)| {
693 if *pi > 0.0 && *qi > 0.0 {
694 pi * (pi / qi).ln()
695 } else {
696 0.0
697 }
698 })
699 .sum()
700 })
701 .collect();
702 Ok(results)
703 }
704
705 #[allow(dead_code)] async fn compute_tensor_batch_gpu(
717 &self,
718 x_batch: &[Multivector<3, 0, 0>],
719 y_batch: &[Multivector<3, 0, 0>],
720 z_batch: &[Multivector<3, 0, 0>],
721 ) -> Result<Vec<f64>, GpuError> {
722 let batch_size = x_batch.len();
723
724 let x_data: Vec<f32> = x_batch
726 .iter()
727 .flat_map(|mv| {
728 vec![
729 mv.vector_component(0) as f32,
730 mv.vector_component(1) as f32,
731 mv.vector_component(2) as f32,
732 ]
733 })
734 .collect();
735
736 let y_data: Vec<f32> = y_batch
737 .iter()
738 .flat_map(|mv| {
739 vec![
740 mv.vector_component(0) as f32,
741 mv.vector_component(1) as f32,
742 mv.vector_component(2) as f32,
743 ]
744 })
745 .collect();
746
747 let z_data: Vec<f32> = z_batch
748 .iter()
749 .flat_map(|mv| {
750 vec![
751 mv.vector_component(0) as f32,
752 mv.vector_component(1) as f32,
753 mv.vector_component(2) as f32,
754 ]
755 })
756 .collect();
757
758 let x_buffer = self
760 .device
761 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
762 label: Some("X Batch Buffer"),
763 contents: bytemuck::cast_slice(&x_data),
764 usage: wgpu::BufferUsages::STORAGE,
765 });
766
767 let y_buffer = self
768 .device
769 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
770 label: Some("Y Batch Buffer"),
771 contents: bytemuck::cast_slice(&y_data),
772 usage: wgpu::BufferUsages::STORAGE,
773 });
774
775 let z_buffer = self
776 .device
777 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
778 label: Some("Z Batch Buffer"),
779 contents: bytemuck::cast_slice(&z_data),
780 usage: wgpu::BufferUsages::STORAGE,
781 });
782
783 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
784 label: Some("Output Buffer"),
785 size: (batch_size * 4) as u64, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
787 mapped_at_creation: false,
788 });
789
790 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
791 label: Some("Staging Buffer"),
792 size: (batch_size * 4) as u64,
793 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
794 mapped_at_creation: false,
795 });
796
797 let bind_group_layout = self.tensor_pipeline.get_bind_group_layout(0);
799 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
800 label: Some("Tensor Compute Bind Group"),
801 layout: &bind_group_layout,
802 entries: &[
803 wgpu::BindGroupEntry {
804 binding: 0,
805 resource: x_buffer.as_entire_binding(),
806 },
807 wgpu::BindGroupEntry {
808 binding: 1,
809 resource: y_buffer.as_entire_binding(),
810 },
811 wgpu::BindGroupEntry {
812 binding: 2,
813 resource: z_buffer.as_entire_binding(),
814 },
815 wgpu::BindGroupEntry {
816 binding: 3,
817 resource: output_buffer.as_entire_binding(),
818 },
819 ],
820 });
821
822 let mut encoder = self
824 .device
825 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
826 label: Some("Tensor Compute Encoder"),
827 });
828
829 {
830 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
831 label: Some("Tensor Compute Pass"),
832 timestamp_writes: None,
833 });
834 compute_pass.set_pipeline(&self.tensor_pipeline);
835 compute_pass.set_bind_group(0, &bind_group, &[]);
836 let workgroup_count = batch_size.div_ceil(64); compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
838 }
839
840 encoder.copy_buffer_to_buffer(
841 &output_buffer,
842 0,
843 &staging_buffer,
844 0,
845 (batch_size * 4) as u64,
846 );
847
848 self.queue.submit(std::iter::once(encoder.finish()));
849
850 let buffer_slice = staging_buffer.slice(..);
852 let (sender, receiver) = futures::channel::oneshot::channel();
853 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
854 let _ = sender.send(result);
855 });
856
857 self.device.poll(wgpu::Maintain::Wait);
858
859 receiver
860 .await
861 .map_err(|_| GpuError::BufferError("Failed to receive buffer map result".to_string()))?
862 .map_err(|e| GpuError::BufferError(format!("Buffer mapping failed: {:?}", e)))?;
863
864 let data = buffer_slice.get_mapped_range();
865 let result_f32: &[f32] = bytemuck::cast_slice(&data);
866 let results: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
867
868 drop(data);
869 staging_buffer.unmap();
870
871 Ok(results)
872 }
873
874 fn create_tensor_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
875 let shader_source = TENSOR_COMPUTE_SHADER;
876 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
877 label: Some("Tensor Compute Shader"),
878 source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(shader_source)),
879 });
880
881 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
882 label: Some("Tensor Compute Pipeline"),
883 layout: None,
884 module: &shader,
885 entry_point: "main",
886 });
887
888 Ok(compute_pipeline)
889 }
890
891 fn create_fisher_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
892 Self::create_tensor_pipeline(device)
894 }
895
896 fn create_divergence_pipeline(
897 device: &wgpu::Device,
898 ) -> Result<wgpu::ComputePipeline, GpuError> {
899 Self::create_tensor_pipeline(device)
901 }
902}
903
904pub struct GpuDeviceInfo {
906 is_gpu: bool,
907 #[allow(dead_code)]
908 description: String,
909}
910
911impl GpuDeviceInfo {
912 fn new(is_gpu: bool, description: &str) -> Self {
913 Self {
914 is_gpu,
915 description: description.to_string(),
916 }
917 }
918
919 pub fn is_gpu(&self) -> bool {
920 self.is_gpu
921 }
922
923 pub fn supports_webgpu(&self) -> bool {
924 self.is_gpu
925 }
926
927 pub fn is_initialized(&self) -> bool {
928 true
929 }
930}
931
932pub struct GpuFisherMatrix {
934 matrix: Vec<Vec<f64>>,
935}
936
937impl GpuFisherMatrix {
938 fn new(matrix: Vec<Vec<f64>>) -> Self {
939 Self { matrix }
940 }
941
942 pub async fn eigenvalues(&self) -> Result<Vec<f64>, GpuError> {
943 let mut eigenvals = Vec::new();
945 for i in 0..self.matrix.len() {
946 if i < self.matrix[i].len() {
947 eigenvals.push(self.matrix[i][i]);
948 }
949 }
950 Ok(eigenvals)
951 }
952}
953
954const TENSOR_COMPUTE_SHADER: &str = r#"
956@group(0) @binding(0)
957var<storage, read> x_batch: array<vec3<f32>>;
958
959@group(0) @binding(1)
960var<storage, read> y_batch: array<vec3<f32>>;
961
962@group(0) @binding(2)
963var<storage, read> z_batch: array<vec3<f32>>;
964
965@group(0) @binding(3)
966var<storage, read_write> output: array<f32>;
967
968@compute @workgroup_size(64)
969fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
970 let idx = global_id.x;
971 if (idx >= arrayLength(&x_batch)) {
972 return;
973 }
974
975 let x = x_batch[idx];
976 let y = y_batch[idx];
977 let z = z_batch[idx];
978
979 // Compute scalar triple product: x · (y × z)
980 let cross_yz = cross(y, z);
981 let scalar_triple = dot(x, cross_yz);
982
983 output[idx] = scalar_triple;
984}
985"#;
986
987#[cfg(test)]
988mod tests {
989 use super::*;
990
991 #[test]
992 fn test_should_use_gpu() {
993 assert!(!GpuCliffordAlgebra::should_use_gpu(10));
994 assert!(GpuCliffordAlgebra::should_use_gpu(1000));
995 }
996
997 #[tokio::test]
998 async fn test_gpu_info_geometry_creation() {
999 if std::env::var("CI").is_ok()
1001 || std::env::var("GITHUB_ACTIONS").is_ok()
1002 || std::env::var("DISPLAY").is_err()
1003 {
1004 println!("Skipping GPU test in CI environment");
1005 return;
1006 }
1007
1008 match GpuInfoGeometry::new().await {
1010 Ok(_) => {
1011 println!("GPU initialization successful");
1013 }
1014 Err(GpuError::InitializationError(_)) => {
1015 println!("GPU initialization failed - no GPU available");
1017 }
1018 Err(e) => panic!("Unexpected error: {:?}", e),
1019 }
1020 }
1021}