1pub mod adaptive;
4pub mod relativistic;
5pub mod verification;
6
7pub use adaptive::{
8 AdaptiveVerificationError, AdaptiveVerificationLevel, AdaptiveVerifier, CpuFeatures,
9 GpuBackend, PlatformCapabilities, PlatformPerformanceProfile, VerificationPlatform,
10 WasmEnvironment,
11};
12use amari_core::Multivector;
13use amari_info_geom::amari_chentsov_tensor;
14use bytemuck::{Pod, Zeroable};
15pub use relativistic::{
16 GpuRelativisticParticle, GpuRelativisticPhysics, GpuSpacetimeVector, GpuTrajectoryParams,
17};
18use thiserror::Error;
19pub use verification::{
20 GpuBoundaryVerifier, GpuVerificationError, RelativisticVerifier, StatisticalGpuVerifier,
21 VerificationConfig, VerificationStrategy, VerifiedMultivector,
22};
23use wgpu::util::DeviceExt;
24
25#[derive(Error, Debug)]
26pub enum GpuError {
27 #[error("Failed to initialize GPU: {0}")]
28 InitializationError(String),
29
30 #[error("GPU buffer error: {0}")]
31 BufferError(String),
32
33 #[error("Shader compilation error: {0}")]
34 ShaderError(String),
35}
36
37pub struct GpuCliffordAlgebra {
39 device: wgpu::Device,
40 queue: wgpu::Queue,
41 compute_pipeline: wgpu::ComputePipeline,
42 cayley_buffer: wgpu::Buffer,
43 #[allow(dead_code)]
44 dim: usize,
45 basis_count: usize,
46}
47
48impl GpuCliffordAlgebra {
49 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Result<Self, GpuError> {
51 let instance = wgpu::Instance::default();
52
53 let adapter = instance
54 .request_adapter(&wgpu::RequestAdapterOptions {
55 power_preference: wgpu::PowerPreference::HighPerformance,
56 compatible_surface: None,
57 force_fallback_adapter: false,
58 })
59 .await
60 .ok_or_else(|| GpuError::InitializationError("No GPU adapter found".to_string()))?;
61
62 let (device, queue) = adapter
63 .request_device(
64 &wgpu::DeviceDescriptor {
65 label: Some("Amari GPU Device"),
66 required_features: wgpu::Features::empty(),
67 required_limits: wgpu::Limits::default(),
68 },
69 None,
70 )
71 .await
72 .map_err(|e| GpuError::InitializationError(e.to_string()))?;
73
74 let dim = P + Q + R;
75 let basis_count = 1 << dim;
76
77 let cayley_table = Self::generate_cayley_table::<P, Q, R>();
79 let cayley_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
80 label: Some("Cayley Table"),
81 contents: bytemuck::cast_slice(&cayley_table),
82 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
83 });
84
85 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
87 label: Some("Geometric Product Shader"),
88 source: wgpu::ShaderSource::Wgsl(GEOMETRIC_PRODUCT_SHADER.into()),
89 });
90
91 let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
92 label: Some("Compute Bind Group Layout"),
93 entries: &[
94 wgpu::BindGroupLayoutEntry {
95 binding: 0,
96 visibility: wgpu::ShaderStages::COMPUTE,
97 ty: wgpu::BindingType::Buffer {
98 ty: wgpu::BufferBindingType::Storage { read_only: true },
99 has_dynamic_offset: false,
100 min_binding_size: None,
101 },
102 count: None,
103 },
104 wgpu::BindGroupLayoutEntry {
105 binding: 1,
106 visibility: wgpu::ShaderStages::COMPUTE,
107 ty: wgpu::BindingType::Buffer {
108 ty: wgpu::BufferBindingType::Storage { read_only: true },
109 has_dynamic_offset: false,
110 min_binding_size: None,
111 },
112 count: None,
113 },
114 wgpu::BindGroupLayoutEntry {
115 binding: 2,
116 visibility: wgpu::ShaderStages::COMPUTE,
117 ty: wgpu::BindingType::Buffer {
118 ty: wgpu::BufferBindingType::Storage { read_only: true },
119 has_dynamic_offset: false,
120 min_binding_size: None,
121 },
122 count: None,
123 },
124 wgpu::BindGroupLayoutEntry {
125 binding: 3,
126 visibility: wgpu::ShaderStages::COMPUTE,
127 ty: wgpu::BindingType::Buffer {
128 ty: wgpu::BufferBindingType::Storage { read_only: false },
129 has_dynamic_offset: false,
130 min_binding_size: None,
131 },
132 count: None,
133 },
134 ],
135 });
136
137 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
138 label: Some("Compute Pipeline Layout"),
139 bind_group_layouts: &[&bind_group_layout],
140 push_constant_ranges: &[],
141 });
142
143 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
144 label: Some("Geometric Product Pipeline"),
145 layout: Some(&pipeline_layout),
146 module: &shader,
147 entry_point: "main",
148 });
149
150 Ok(Self {
151 device,
152 queue,
153 compute_pipeline,
154 cayley_buffer,
155 dim,
156 basis_count,
157 })
158 }
159
160 fn generate_cayley_table<const P: usize, const Q: usize, const R: usize>() -> Vec<CayleyEntry> {
162 use amari_core::cayley::CayleyTable;
163
164 let table = CayleyTable::<P, Q, R>::get();
165 let basis_count = 1 << (P + Q + R);
166 let mut flat_table = Vec::with_capacity(basis_count * basis_count);
167
168 for i in 0..basis_count {
169 for j in 0..basis_count {
170 let (sign, index) = table.get_product(i, j);
171 flat_table.push(CayleyEntry {
172 sign: sign as f32,
173 index: index as u32,
174 });
175 }
176 }
177
178 flat_table
179 }
180
181 pub async fn batch_geometric_product(
183 &self,
184 a_batch: &[f64],
185 b_batch: &[f64],
186 ) -> Result<Vec<f64>, GpuError> {
187 let batch_size = a_batch.len() / self.basis_count;
188
189 if a_batch.len() != b_batch.len() {
190 return Err(GpuError::BufferError(
191 "Input batches must have same size".to_string(),
192 ));
193 }
194
195 let a_f32: Vec<f32> = a_batch.iter().map(|&x| x as f32).collect();
197 let b_f32: Vec<f32> = b_batch.iter().map(|&x| x as f32).collect();
198
199 let a_buffer = self
201 .device
202 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
203 label: Some("A Buffer"),
204 contents: bytemuck::cast_slice(&a_f32),
205 usage: wgpu::BufferUsages::STORAGE,
206 });
207
208 let b_buffer = self
209 .device
210 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
211 label: Some("B Buffer"),
212 contents: bytemuck::cast_slice(&b_f32),
213 usage: wgpu::BufferUsages::STORAGE,
214 });
215
216 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
217 label: Some("Output Buffer"),
218 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
219 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
220 mapped_at_creation: false,
221 });
222
223 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
224 label: Some("Staging Buffer"),
225 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
226 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
227 mapped_at_creation: false,
228 });
229
230 let bind_group_layout = self.compute_pipeline.get_bind_group_layout(0);
232 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
233 label: Some("Compute Bind Group"),
234 layout: &bind_group_layout,
235 entries: &[
236 wgpu::BindGroupEntry {
237 binding: 0,
238 resource: self.cayley_buffer.as_entire_binding(),
239 },
240 wgpu::BindGroupEntry {
241 binding: 1,
242 resource: a_buffer.as_entire_binding(),
243 },
244 wgpu::BindGroupEntry {
245 binding: 2,
246 resource: b_buffer.as_entire_binding(),
247 },
248 wgpu::BindGroupEntry {
249 binding: 3,
250 resource: output_buffer.as_entire_binding(),
251 },
252 ],
253 });
254
255 let mut encoder = self
257 .device
258 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
259 label: Some("Compute Encoder"),
260 });
261
262 {
263 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
264 label: Some("Compute Pass"),
265 timestamp_writes: None,
266 });
267
268 compute_pass.set_pipeline(&self.compute_pipeline);
269 compute_pass.set_bind_group(0, &bind_group, &[]);
270 compute_pass.dispatch_workgroups(batch_size as u32, 1, 1);
271 }
272
273 encoder.copy_buffer_to_buffer(
274 &output_buffer,
275 0,
276 &staging_buffer,
277 0,
278 (a_batch.len() * std::mem::size_of::<f32>()) as u64,
279 );
280
281 self.queue.submit(Some(encoder.finish()));
282
283 let buffer_slice = staging_buffer.slice(..);
285 let (sender, receiver) = futures::channel::oneshot::channel();
286 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
287 sender.send(result).unwrap();
288 });
289
290 self.device.poll(wgpu::Maintain::Wait);
291 receiver
292 .await
293 .unwrap()
294 .map_err(|e| GpuError::BufferError(e.to_string()))?;
295
296 let data = buffer_slice.get_mapped_range();
297 let result_f32: &[f32] = bytemuck::cast_slice(&data);
298 let result: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
299
300 drop(data);
301 staging_buffer.unmap();
302
303 Ok(result)
304 }
305
306 pub fn should_use_gpu(operation_count: usize) -> bool {
308 operation_count >= 100
310 }
311}
312
313#[repr(C)]
315#[derive(Copy, Clone, Pod, Zeroable)]
316struct CayleyEntry {
317 sign: f32,
318 index: u32,
319}
320
321const GEOMETRIC_PRODUCT_SHADER: &str = r#"
323struct CayleyEntry {
324 sign: f32,
325 index: u32,
326}
327
328@group(0) @binding(0)
329var<storage, read> cayley_table: array<CayleyEntry>;
330
331@group(0) @binding(1)
332var<storage, read> a_batch: array<f32>;
333
334@group(0) @binding(2)
335var<storage, read> b_batch: array<f32>;
336
337@group(0) @binding(3)
338var<storage, read_write> output: array<f32>;
339
340const BASIS_COUNT: u32 = 8u; // For 3D Clifford algebra
341
342@compute @workgroup_size(1)
343fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
344 let batch_idx = global_id.x;
345 let offset = batch_idx * BASIS_COUNT;
346
347 // Clear output
348 for (var k = 0u; k < BASIS_COUNT; k = k + 1u) {
349 output[offset + k] = 0.0;
350 }
351
352 // Compute geometric product
353 for (var i = 0u; i < BASIS_COUNT; i = i + 1u) {
354 let a_coeff = a_batch[offset + i];
355 if (abs(a_coeff) < 1e-14) {
356 continue;
357 }
358
359 for (var j = 0u; j < BASIS_COUNT; j = j + 1u) {
360 let b_coeff = b_batch[offset + j];
361 if (abs(b_coeff) < 1e-14) {
362 continue;
363 }
364
365 let table_idx = i * BASIS_COUNT + j;
366 let entry = cayley_table[table_idx];
367 output[offset + entry.index] += entry.sign * a_coeff * b_coeff;
368 }
369 }
370}
371"#;
372
373pub struct AdaptiveCompute {
375 gpu: Option<GpuCliffordAlgebra>,
376}
377
378impl AdaptiveCompute {
379 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Self {
381 let gpu = GpuCliffordAlgebra::new::<P, Q, R>().await.ok();
382 Self { gpu }
383 }
384
385 pub async fn geometric_product<const P: usize, const Q: usize, const R: usize>(
387 &self,
388 a: &Multivector<P, Q, R>,
389 b: &Multivector<P, Q, R>,
390 ) -> Multivector<P, Q, R> {
391 a.geometric_product(b)
393 }
394
395 pub async fn batch_geometric_product(
397 &self,
398 a_batch: &[f64],
399 b_batch: &[f64],
400 ) -> Result<Vec<f64>, GpuError> {
401 let batch_size = a_batch.len() / 8; if let Some(gpu) = &self.gpu {
404 if GpuCliffordAlgebra::should_use_gpu(batch_size) {
405 return gpu.batch_geometric_product(a_batch, b_batch).await;
406 }
407 }
408
409 let mut result = Vec::with_capacity(a_batch.len());
411 for i in 0..batch_size {
412 let start = i * 8;
413 let end = start + 8;
414
415 let a = Multivector::<3, 0, 0>::from_coefficients(a_batch[start..end].to_vec());
416 let b = Multivector::<3, 0, 0>::from_coefficients(b_batch[start..end].to_vec());
417 let product = a.geometric_product(&b);
418
419 for j in 0..8 {
420 result.push(product.get(j));
421 }
422 }
423
424 Ok(result)
425 }
426}
427
428pub struct GpuInfoGeometry {
439 device: wgpu::Device,
440 queue: wgpu::Queue,
441 tensor_pipeline: wgpu::ComputePipeline,
442 #[allow(dead_code)]
443 fisher_pipeline: wgpu::ComputePipeline,
444 #[allow(dead_code)]
445 divergence_pipeline: wgpu::ComputePipeline,
446}
447
448impl GpuInfoGeometry {
449 pub async fn new() -> Result<Self, GpuError> {
451 let instance = wgpu::Instance::default();
452
453 let adapter = if let Some(adapter) = instance
455 .request_adapter(&wgpu::RequestAdapterOptions {
456 power_preference: wgpu::PowerPreference::HighPerformance,
457 compatible_surface: None,
458 force_fallback_adapter: false,
459 })
460 .await
461 {
462 adapter
463 } else if let Some(adapter) = instance
464 .request_adapter(&wgpu::RequestAdapterOptions {
465 power_preference: wgpu::PowerPreference::LowPower,
466 compatible_surface: None,
467 force_fallback_adapter: false,
468 })
469 .await
470 {
471 adapter
472 } else if let Some(adapter) = instance
473 .request_adapter(&wgpu::RequestAdapterOptions {
474 power_preference: wgpu::PowerPreference::None,
475 compatible_surface: None,
476 force_fallback_adapter: true,
477 })
478 .await
479 {
480 adapter
481 } else {
482 return Err(GpuError::InitializationError(
483 "No GPU adapter found".to_string(),
484 ));
485 };
486
487 let (device, queue) = adapter
488 .request_device(
489 &wgpu::DeviceDescriptor {
490 label: Some("Amari GPU Info Geometry Device"),
491 required_features: wgpu::Features::empty(),
492 required_limits: wgpu::Limits::default(),
493 },
494 None,
495 )
496 .await
497 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
498
499 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
501 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
502 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
503
504 Ok(Self {
505 device,
506 queue,
507 tensor_pipeline,
508 fisher_pipeline,
509 divergence_pipeline,
510 })
511 }
512
513 pub async fn new_with_device_preference(device_type: &str) -> Result<Self, GpuError> {
515 let (power_preference, force_fallback) = match device_type {
516 "high-performance" => (wgpu::PowerPreference::HighPerformance, false),
517 "low-power" => (wgpu::PowerPreference::LowPower, false),
518 "fallback" => (wgpu::PowerPreference::None, true),
519 _ => {
520 return Err(GpuError::InitializationError(
521 "Invalid device type".to_string(),
522 ))
523 }
524 };
525
526 let instance = wgpu::Instance::default();
527
528 let adapter = instance
529 .request_adapter(&wgpu::RequestAdapterOptions {
530 power_preference,
531 compatible_surface: None,
532 force_fallback_adapter: force_fallback,
533 })
534 .await
535 .ok_or_else(|| {
536 GpuError::InitializationError("No suitable adapter found".to_string())
537 })?;
538
539 let (device, queue) = adapter
540 .request_device(
541 &wgpu::DeviceDescriptor {
542 label: Some("Amari GPU Info Geometry Device"),
543 required_features: wgpu::Features::empty(),
544 required_limits: wgpu::Limits::default(),
545 },
546 None,
547 )
548 .await
549 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
550
551 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
552 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
553 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
554
555 Ok(Self {
556 device,
557 queue,
558 tensor_pipeline,
559 fisher_pipeline,
560 divergence_pipeline,
561 })
562 }
563
564 pub async fn amari_chentsov_tensor(
566 &self,
567 x: &Multivector<3, 0, 0>,
568 y: &Multivector<3, 0, 0>,
569 z: &Multivector<3, 0, 0>,
570 ) -> Result<f64, GpuError> {
571 Ok(amari_chentsov_tensor(x, y, z))
573 }
574
575 pub async fn amari_chentsov_tensor_batch(
586 &self,
587 x_batch: &[Multivector<3, 0, 0>],
588 y_batch: &[Multivector<3, 0, 0>],
589 z_batch: &[Multivector<3, 0, 0>],
590 ) -> Result<Vec<f64>, GpuError> {
591 let batch_size = x_batch.len();
592 if batch_size == 0 {
593 return Ok(Vec::new());
594 }
595
596 if batch_size < 100 {
598 let results = x_batch
599 .iter()
600 .zip(y_batch.iter())
601 .zip(z_batch.iter())
602 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
603 .collect();
604 return Ok(results);
605 }
606
607 let results = x_batch
611 .iter()
612 .zip(y_batch.iter())
613 .zip(z_batch.iter())
614 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
615 .collect();
616 Ok(results)
617 }
618
619 pub async fn amari_chentsov_tensor_from_typed_arrays(
621 &self,
622 flat_data: &[f64],
623 batch_size: usize,
624 ) -> Result<Vec<f64>, GpuError> {
625 if flat_data.len() != batch_size * 9 {
626 return Err(GpuError::BufferError("Invalid flat data size".to_string()));
627 }
628
629 let mut x_batch = Vec::with_capacity(batch_size);
631 let mut y_batch = Vec::with_capacity(batch_size);
632 let mut z_batch = Vec::with_capacity(batch_size);
633
634 for i in 0..batch_size {
635 let base = i * 9;
636 let mut x = Multivector::zero();
637 let mut y = Multivector::zero();
638 let mut z = Multivector::zero();
639
640 x.set_vector_component(0, flat_data[base]);
642 x.set_vector_component(1, flat_data[base + 1]);
643 x.set_vector_component(2, flat_data[base + 2]);
644
645 y.set_vector_component(0, flat_data[base + 3]);
646 y.set_vector_component(1, flat_data[base + 4]);
647 y.set_vector_component(2, flat_data[base + 5]);
648
649 z.set_vector_component(0, flat_data[base + 6]);
650 z.set_vector_component(1, flat_data[base + 7]);
651 z.set_vector_component(2, flat_data[base + 8]);
652
653 x_batch.push(x);
654 y_batch.push(y);
655 z_batch.push(z);
656 }
657
658 self.amari_chentsov_tensor_batch(&x_batch, &y_batch, &z_batch)
659 .await
660 }
661
662 pub async fn device_info(&self) -> Result<GpuDeviceInfo, GpuError> {
664 Ok(GpuDeviceInfo::new(true, "WebGPU Device"))
665 }
666
667 pub async fn memory_usage(&self) -> Result<u64, GpuError> {
669 Ok(1024 * 1024) }
672
673 pub async fn fisher_information_matrix(
675 &self,
676 _parameters: &[f64],
677 ) -> Result<GpuFisherMatrix, GpuError> {
678 Ok(GpuFisherMatrix::new(vec![vec![1.0, 0.0], vec![0.0, 1.0]]))
680 }
681
682 pub async fn bregman_divergence_batch(
684 &self,
685 p_batch: &[Vec<f64>],
686 q_batch: &[Vec<f64>],
687 ) -> Result<Vec<f64>, GpuError> {
688 let results = p_batch
690 .iter()
691 .zip(q_batch.iter())
692 .map(|(p, q)| {
693 p.iter()
695 .zip(q.iter())
696 .map(|(pi, qi)| {
697 if *pi > 0.0 && *qi > 0.0 {
698 pi * (pi / qi).ln()
699 } else {
700 0.0
701 }
702 })
703 .sum()
704 })
705 .collect();
706 Ok(results)
707 }
708
709 #[allow(dead_code)] async fn compute_tensor_batch_gpu(
721 &self,
722 x_batch: &[Multivector<3, 0, 0>],
723 y_batch: &[Multivector<3, 0, 0>],
724 z_batch: &[Multivector<3, 0, 0>],
725 ) -> Result<Vec<f64>, GpuError> {
726 let batch_size = x_batch.len();
727
728 let x_data: Vec<f32> = x_batch
730 .iter()
731 .flat_map(|mv| {
732 vec![
733 mv.vector_component(0) as f32,
734 mv.vector_component(1) as f32,
735 mv.vector_component(2) as f32,
736 ]
737 })
738 .collect();
739
740 let y_data: Vec<f32> = y_batch
741 .iter()
742 .flat_map(|mv| {
743 vec![
744 mv.vector_component(0) as f32,
745 mv.vector_component(1) as f32,
746 mv.vector_component(2) as f32,
747 ]
748 })
749 .collect();
750
751 let z_data: Vec<f32> = z_batch
752 .iter()
753 .flat_map(|mv| {
754 vec![
755 mv.vector_component(0) as f32,
756 mv.vector_component(1) as f32,
757 mv.vector_component(2) as f32,
758 ]
759 })
760 .collect();
761
762 let x_buffer = self
764 .device
765 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
766 label: Some("X Batch Buffer"),
767 contents: bytemuck::cast_slice(&x_data),
768 usage: wgpu::BufferUsages::STORAGE,
769 });
770
771 let y_buffer = self
772 .device
773 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
774 label: Some("Y Batch Buffer"),
775 contents: bytemuck::cast_slice(&y_data),
776 usage: wgpu::BufferUsages::STORAGE,
777 });
778
779 let z_buffer = self
780 .device
781 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
782 label: Some("Z Batch Buffer"),
783 contents: bytemuck::cast_slice(&z_data),
784 usage: wgpu::BufferUsages::STORAGE,
785 });
786
787 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
788 label: Some("Output Buffer"),
789 size: (batch_size * 4) as u64, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
791 mapped_at_creation: false,
792 });
793
794 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
795 label: Some("Staging Buffer"),
796 size: (batch_size * 4) as u64,
797 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
798 mapped_at_creation: false,
799 });
800
801 let bind_group_layout = self.tensor_pipeline.get_bind_group_layout(0);
803 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
804 label: Some("Tensor Compute Bind Group"),
805 layout: &bind_group_layout,
806 entries: &[
807 wgpu::BindGroupEntry {
808 binding: 0,
809 resource: x_buffer.as_entire_binding(),
810 },
811 wgpu::BindGroupEntry {
812 binding: 1,
813 resource: y_buffer.as_entire_binding(),
814 },
815 wgpu::BindGroupEntry {
816 binding: 2,
817 resource: z_buffer.as_entire_binding(),
818 },
819 wgpu::BindGroupEntry {
820 binding: 3,
821 resource: output_buffer.as_entire_binding(),
822 },
823 ],
824 });
825
826 let mut encoder = self
828 .device
829 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
830 label: Some("Tensor Compute Encoder"),
831 });
832
833 {
834 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
835 label: Some("Tensor Compute Pass"),
836 timestamp_writes: None,
837 });
838 compute_pass.set_pipeline(&self.tensor_pipeline);
839 compute_pass.set_bind_group(0, &bind_group, &[]);
840 let workgroup_count = batch_size.div_ceil(64); compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
842 }
843
844 encoder.copy_buffer_to_buffer(
845 &output_buffer,
846 0,
847 &staging_buffer,
848 0,
849 (batch_size * 4) as u64,
850 );
851
852 self.queue.submit(std::iter::once(encoder.finish()));
853
854 let buffer_slice = staging_buffer.slice(..);
856 let (sender, receiver) = futures::channel::oneshot::channel();
857 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
858 let _ = sender.send(result);
859 });
860
861 self.device.poll(wgpu::Maintain::Wait);
862
863 receiver
864 .await
865 .map_err(|_| GpuError::BufferError("Failed to receive buffer map result".to_string()))?
866 .map_err(|e| GpuError::BufferError(format!("Buffer mapping failed: {:?}", e)))?;
867
868 let data = buffer_slice.get_mapped_range();
869 let result_f32: &[f32] = bytemuck::cast_slice(&data);
870 let results: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
871
872 drop(data);
873 staging_buffer.unmap();
874
875 Ok(results)
876 }
877
878 fn create_tensor_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
879 let shader_source = TENSOR_COMPUTE_SHADER;
880 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
881 label: Some("Tensor Compute Shader"),
882 source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(shader_source)),
883 });
884
885 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
886 label: Some("Tensor Compute Pipeline"),
887 layout: None,
888 module: &shader,
889 entry_point: "main",
890 });
891
892 Ok(compute_pipeline)
893 }
894
895 fn create_fisher_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
896 Self::create_tensor_pipeline(device)
898 }
899
900 fn create_divergence_pipeline(
901 device: &wgpu::Device,
902 ) -> Result<wgpu::ComputePipeline, GpuError> {
903 Self::create_tensor_pipeline(device)
905 }
906}
907
908pub struct GpuDeviceInfo {
910 is_gpu: bool,
911 #[allow(dead_code)]
912 description: String,
913}
914
915impl GpuDeviceInfo {
916 fn new(is_gpu: bool, description: &str) -> Self {
917 Self {
918 is_gpu,
919 description: description.to_string(),
920 }
921 }
922
923 pub fn is_gpu(&self) -> bool {
924 self.is_gpu
925 }
926
927 pub fn supports_webgpu(&self) -> bool {
928 self.is_gpu
929 }
930
931 pub fn is_initialized(&self) -> bool {
932 true
933 }
934}
935
936pub struct GpuFisherMatrix {
938 matrix: Vec<Vec<f64>>,
939}
940
941impl GpuFisherMatrix {
942 fn new(matrix: Vec<Vec<f64>>) -> Self {
943 Self { matrix }
944 }
945
946 pub async fn eigenvalues(&self) -> Result<Vec<f64>, GpuError> {
947 let mut eigenvals = Vec::new();
949 for i in 0..self.matrix.len() {
950 if i < self.matrix[i].len() {
951 eigenvals.push(self.matrix[i][i]);
952 }
953 }
954 Ok(eigenvals)
955 }
956}
957
958const TENSOR_COMPUTE_SHADER: &str = r#"
960@group(0) @binding(0)
961var<storage, read> x_batch: array<vec3<f32>>;
962
963@group(0) @binding(1)
964var<storage, read> y_batch: array<vec3<f32>>;
965
966@group(0) @binding(2)
967var<storage, read> z_batch: array<vec3<f32>>;
968
969@group(0) @binding(3)
970var<storage, read_write> output: array<f32>;
971
972@compute @workgroup_size(64)
973fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
974 let idx = global_id.x;
975 if (idx >= arrayLength(&x_batch)) {
976 return;
977 }
978
979 let x = x_batch[idx];
980 let y = y_batch[idx];
981 let z = z_batch[idx];
982
983 // Compute scalar triple product: x · (y × z)
984 let cross_yz = cross(y, z);
985 let scalar_triple = dot(x, cross_yz);
986
987 output[idx] = scalar_triple;
988}
989"#;
990
991#[cfg(test)]
992mod tests {
993 use super::*;
994
995 #[test]
996 fn test_should_use_gpu() {
997 assert!(!GpuCliffordAlgebra::should_use_gpu(10));
998 assert!(GpuCliffordAlgebra::should_use_gpu(1000));
999 }
1000
1001 #[tokio::test]
1002 async fn test_gpu_info_geometry_creation() {
1003 if std::env::var("CI").is_ok()
1005 || std::env::var("GITHUB_ACTIONS").is_ok()
1006 || std::env::var("DISPLAY").is_err()
1007 {
1008 println!("Skipping GPU test in CI environment");
1009 return;
1010 }
1011
1012 match GpuInfoGeometry::new().await {
1014 Ok(_) => {
1015 println!("GPU initialization successful");
1017 }
1018 Err(GpuError::InitializationError(_)) => {
1019 println!("GPU initialization failed - no GPU available");
1021 }
1022 Err(e) => panic!("Unexpected error: {:?}", e),
1023 }
1024 }
1025}