1pub mod adaptive;
4pub mod network;
5pub mod relativistic;
6pub mod verification;
7
8pub use adaptive::{
9 AdaptiveVerificationError, AdaptiveVerificationLevel, AdaptiveVerifier, CpuFeatures,
10 GpuBackend, PlatformCapabilities, PlatformPerformanceProfile, VerificationPlatform,
11 WasmEnvironment,
12};
13use amari_core::Multivector;
14use amari_info_geom::amari_chentsov_tensor;
15use bytemuck::{Pod, Zeroable};
16pub use network::{AdaptiveNetworkCompute, GpuGeometricNetwork, GpuNetworkError, GpuNetworkResult};
17pub use relativistic::{
18 GpuRelativisticParticle, GpuRelativisticPhysics, GpuSpacetimeVector, GpuTrajectoryParams,
19};
20use thiserror::Error;
21pub use verification::{
22 GpuBoundaryVerifier, GpuVerificationError, RelativisticVerifier, StatisticalGpuVerifier,
23 VerificationConfig, VerificationStrategy, VerifiedMultivector,
24};
25use wgpu::util::DeviceExt;
26
27#[derive(Error, Debug)]
28pub enum GpuError {
29 #[error("Failed to initialize GPU: {0}")]
30 InitializationError(String),
31
32 #[error("GPU buffer error: {0}")]
33 BufferError(String),
34
35 #[error("Shader compilation error: {0}")]
36 ShaderError(String),
37}
38
39pub struct GpuCliffordAlgebra {
41 device: wgpu::Device,
42 queue: wgpu::Queue,
43 compute_pipeline: wgpu::ComputePipeline,
44 cayley_buffer: wgpu::Buffer,
45 #[allow(dead_code)]
46 dim: usize,
47 basis_count: usize,
48}
49
50impl GpuCliffordAlgebra {
51 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Result<Self, GpuError> {
53 let instance = wgpu::Instance::default();
54
55 let adapter = instance
56 .request_adapter(&wgpu::RequestAdapterOptions {
57 power_preference: wgpu::PowerPreference::HighPerformance,
58 compatible_surface: None,
59 force_fallback_adapter: false,
60 })
61 .await
62 .ok_or_else(|| GpuError::InitializationError("No GPU adapter found".to_string()))?;
63
64 let (device, queue) = adapter
65 .request_device(
66 &wgpu::DeviceDescriptor {
67 label: Some("Amari GPU Device"),
68 required_features: wgpu::Features::empty(),
69 required_limits: wgpu::Limits::default(),
70 },
71 None,
72 )
73 .await
74 .map_err(|e| GpuError::InitializationError(e.to_string()))?;
75
76 let dim = P + Q + R;
77 let basis_count = 1 << dim;
78
79 let cayley_table = Self::generate_cayley_table::<P, Q, R>();
81 let cayley_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
82 label: Some("Cayley Table"),
83 contents: bytemuck::cast_slice(&cayley_table),
84 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
85 });
86
87 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
89 label: Some("Geometric Product Shader"),
90 source: wgpu::ShaderSource::Wgsl(GEOMETRIC_PRODUCT_SHADER.into()),
91 });
92
93 let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
94 label: Some("Compute Bind Group Layout"),
95 entries: &[
96 wgpu::BindGroupLayoutEntry {
97 binding: 0,
98 visibility: wgpu::ShaderStages::COMPUTE,
99 ty: wgpu::BindingType::Buffer {
100 ty: wgpu::BufferBindingType::Storage { read_only: true },
101 has_dynamic_offset: false,
102 min_binding_size: None,
103 },
104 count: None,
105 },
106 wgpu::BindGroupLayoutEntry {
107 binding: 1,
108 visibility: wgpu::ShaderStages::COMPUTE,
109 ty: wgpu::BindingType::Buffer {
110 ty: wgpu::BufferBindingType::Storage { read_only: true },
111 has_dynamic_offset: false,
112 min_binding_size: None,
113 },
114 count: None,
115 },
116 wgpu::BindGroupLayoutEntry {
117 binding: 2,
118 visibility: wgpu::ShaderStages::COMPUTE,
119 ty: wgpu::BindingType::Buffer {
120 ty: wgpu::BufferBindingType::Storage { read_only: true },
121 has_dynamic_offset: false,
122 min_binding_size: None,
123 },
124 count: None,
125 },
126 wgpu::BindGroupLayoutEntry {
127 binding: 3,
128 visibility: wgpu::ShaderStages::COMPUTE,
129 ty: wgpu::BindingType::Buffer {
130 ty: wgpu::BufferBindingType::Storage { read_only: false },
131 has_dynamic_offset: false,
132 min_binding_size: None,
133 },
134 count: None,
135 },
136 ],
137 });
138
139 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
140 label: Some("Compute Pipeline Layout"),
141 bind_group_layouts: &[&bind_group_layout],
142 push_constant_ranges: &[],
143 });
144
145 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
146 label: Some("Geometric Product Pipeline"),
147 layout: Some(&pipeline_layout),
148 module: &shader,
149 entry_point: "main",
150 });
151
152 Ok(Self {
153 device,
154 queue,
155 compute_pipeline,
156 cayley_buffer,
157 dim,
158 basis_count,
159 })
160 }
161
162 fn generate_cayley_table<const P: usize, const Q: usize, const R: usize>() -> Vec<CayleyEntry> {
164 use amari_core::cayley::CayleyTable;
165
166 let table = CayleyTable::<P, Q, R>::get();
167 let basis_count = 1 << (P + Q + R);
168 let mut flat_table = Vec::with_capacity(basis_count * basis_count);
169
170 for i in 0..basis_count {
171 for j in 0..basis_count {
172 let (sign, index) = table.get_product(i, j);
173 flat_table.push(CayleyEntry {
174 sign: sign as f32,
175 index: index as u32,
176 });
177 }
178 }
179
180 flat_table
181 }
182
183 pub async fn batch_geometric_product(
185 &self,
186 a_batch: &[f64],
187 b_batch: &[f64],
188 ) -> Result<Vec<f64>, GpuError> {
189 let batch_size = a_batch.len() / self.basis_count;
190
191 if a_batch.len() != b_batch.len() {
192 return Err(GpuError::BufferError(
193 "Input batches must have same size".to_string(),
194 ));
195 }
196
197 let a_f32: Vec<f32> = a_batch.iter().map(|&x| x as f32).collect();
199 let b_f32: Vec<f32> = b_batch.iter().map(|&x| x as f32).collect();
200
201 let a_buffer = self
203 .device
204 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
205 label: Some("A Buffer"),
206 contents: bytemuck::cast_slice(&a_f32),
207 usage: wgpu::BufferUsages::STORAGE,
208 });
209
210 let b_buffer = self
211 .device
212 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
213 label: Some("B Buffer"),
214 contents: bytemuck::cast_slice(&b_f32),
215 usage: wgpu::BufferUsages::STORAGE,
216 });
217
218 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
219 label: Some("Output Buffer"),
220 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
221 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
222 mapped_at_creation: false,
223 });
224
225 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
226 label: Some("Staging Buffer"),
227 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
228 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
229 mapped_at_creation: false,
230 });
231
232 let bind_group_layout = self.compute_pipeline.get_bind_group_layout(0);
234 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
235 label: Some("Compute Bind Group"),
236 layout: &bind_group_layout,
237 entries: &[
238 wgpu::BindGroupEntry {
239 binding: 0,
240 resource: self.cayley_buffer.as_entire_binding(),
241 },
242 wgpu::BindGroupEntry {
243 binding: 1,
244 resource: a_buffer.as_entire_binding(),
245 },
246 wgpu::BindGroupEntry {
247 binding: 2,
248 resource: b_buffer.as_entire_binding(),
249 },
250 wgpu::BindGroupEntry {
251 binding: 3,
252 resource: output_buffer.as_entire_binding(),
253 },
254 ],
255 });
256
257 let mut encoder = self
259 .device
260 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
261 label: Some("Compute Encoder"),
262 });
263
264 {
265 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
266 label: Some("Compute Pass"),
267 timestamp_writes: None,
268 });
269
270 compute_pass.set_pipeline(&self.compute_pipeline);
271 compute_pass.set_bind_group(0, &bind_group, &[]);
272 compute_pass.dispatch_workgroups(batch_size as u32, 1, 1);
273 }
274
275 encoder.copy_buffer_to_buffer(
276 &output_buffer,
277 0,
278 &staging_buffer,
279 0,
280 (a_batch.len() * std::mem::size_of::<f32>()) as u64,
281 );
282
283 self.queue.submit(Some(encoder.finish()));
284
285 let buffer_slice = staging_buffer.slice(..);
287 let (sender, receiver) = futures::channel::oneshot::channel();
288 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
289 sender.send(result).unwrap();
290 });
291
292 self.device.poll(wgpu::Maintain::Wait);
293 receiver
294 .await
295 .unwrap()
296 .map_err(|e| GpuError::BufferError(e.to_string()))?;
297
298 let data = buffer_slice.get_mapped_range();
299 let result_f32: &[f32] = bytemuck::cast_slice(&data);
300 let result: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
301
302 drop(data);
303 staging_buffer.unmap();
304
305 Ok(result)
306 }
307
308 pub fn should_use_gpu(operation_count: usize) -> bool {
310 operation_count >= 100
312 }
313}
314
315#[repr(C)]
317#[derive(Copy, Clone, Pod, Zeroable)]
318struct CayleyEntry {
319 sign: f32,
320 index: u32,
321}
322
323const GEOMETRIC_PRODUCT_SHADER: &str = r#"
325struct CayleyEntry {
326 sign: f32,
327 index: u32,
328}
329
330@group(0) @binding(0)
331var<storage, read> cayley_table: array<CayleyEntry>;
332
333@group(0) @binding(1)
334var<storage, read> a_batch: array<f32>;
335
336@group(0) @binding(2)
337var<storage, read> b_batch: array<f32>;
338
339@group(0) @binding(3)
340var<storage, read_write> output: array<f32>;
341
342const BASIS_COUNT: u32 = 8u; // For 3D Clifford algebra
343
344@compute @workgroup_size(1)
345fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
346 let batch_idx = global_id.x;
347 let offset = batch_idx * BASIS_COUNT;
348
349 // Clear output
350 for (var k = 0u; k < BASIS_COUNT; k = k + 1u) {
351 output[offset + k] = 0.0;
352 }
353
354 // Compute geometric product
355 for (var i = 0u; i < BASIS_COUNT; i = i + 1u) {
356 let a_coeff = a_batch[offset + i];
357 if (abs(a_coeff) < 1e-14) {
358 continue;
359 }
360
361 for (var j = 0u; j < BASIS_COUNT; j = j + 1u) {
362 let b_coeff = b_batch[offset + j];
363 if (abs(b_coeff) < 1e-14) {
364 continue;
365 }
366
367 let table_idx = i * BASIS_COUNT + j;
368 let entry = cayley_table[table_idx];
369 output[offset + entry.index] += entry.sign * a_coeff * b_coeff;
370 }
371 }
372}
373"#;
374
375pub struct AdaptiveCompute {
377 gpu: Option<GpuCliffordAlgebra>,
378}
379
380impl AdaptiveCompute {
381 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Self {
383 let gpu = GpuCliffordAlgebra::new::<P, Q, R>().await.ok();
384 Self { gpu }
385 }
386
387 pub async fn geometric_product<const P: usize, const Q: usize, const R: usize>(
389 &self,
390 a: &Multivector<P, Q, R>,
391 b: &Multivector<P, Q, R>,
392 ) -> Multivector<P, Q, R> {
393 a.geometric_product(b)
395 }
396
397 pub async fn batch_geometric_product(
399 &self,
400 a_batch: &[f64],
401 b_batch: &[f64],
402 ) -> Result<Vec<f64>, GpuError> {
403 let batch_size = a_batch.len() / 8; if let Some(gpu) = &self.gpu {
406 if GpuCliffordAlgebra::should_use_gpu(batch_size) {
407 return gpu.batch_geometric_product(a_batch, b_batch).await;
408 }
409 }
410
411 let mut result = Vec::with_capacity(a_batch.len());
413 for i in 0..batch_size {
414 let start = i * 8;
415 let end = start + 8;
416
417 let a = Multivector::<3, 0, 0>::from_coefficients(a_batch[start..end].to_vec());
418 let b = Multivector::<3, 0, 0>::from_coefficients(b_batch[start..end].to_vec());
419 let product = a.geometric_product(&b);
420
421 for j in 0..8 {
422 result.push(product.get(j));
423 }
424 }
425
426 Ok(result)
427 }
428}
429
430pub struct GpuInfoGeometry {
441 device: wgpu::Device,
442 queue: wgpu::Queue,
443 tensor_pipeline: wgpu::ComputePipeline,
444 #[allow(dead_code)]
445 fisher_pipeline: wgpu::ComputePipeline,
446 #[allow(dead_code)]
447 divergence_pipeline: wgpu::ComputePipeline,
448}
449
450impl GpuInfoGeometry {
451 pub async fn new() -> Result<Self, GpuError> {
453 let instance = wgpu::Instance::default();
454
455 let adapter = if let Some(adapter) = instance
457 .request_adapter(&wgpu::RequestAdapterOptions {
458 power_preference: wgpu::PowerPreference::HighPerformance,
459 compatible_surface: None,
460 force_fallback_adapter: false,
461 })
462 .await
463 {
464 adapter
465 } else if let Some(adapter) = instance
466 .request_adapter(&wgpu::RequestAdapterOptions {
467 power_preference: wgpu::PowerPreference::LowPower,
468 compatible_surface: None,
469 force_fallback_adapter: false,
470 })
471 .await
472 {
473 adapter
474 } else if let Some(adapter) = instance
475 .request_adapter(&wgpu::RequestAdapterOptions {
476 power_preference: wgpu::PowerPreference::None,
477 compatible_surface: None,
478 force_fallback_adapter: true,
479 })
480 .await
481 {
482 adapter
483 } else {
484 return Err(GpuError::InitializationError(
485 "No GPU adapter found".to_string(),
486 ));
487 };
488
489 let (device, queue) = adapter
490 .request_device(
491 &wgpu::DeviceDescriptor {
492 label: Some("Amari GPU Info Geometry Device"),
493 required_features: wgpu::Features::empty(),
494 required_limits: wgpu::Limits::default(),
495 },
496 None,
497 )
498 .await
499 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
500
501 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
503 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
504 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
505
506 Ok(Self {
507 device,
508 queue,
509 tensor_pipeline,
510 fisher_pipeline,
511 divergence_pipeline,
512 })
513 }
514
515 pub async fn new_with_device_preference(device_type: &str) -> Result<Self, GpuError> {
517 let (power_preference, force_fallback) = match device_type {
518 "high-performance" => (wgpu::PowerPreference::HighPerformance, false),
519 "low-power" => (wgpu::PowerPreference::LowPower, false),
520 "fallback" => (wgpu::PowerPreference::None, true),
521 _ => {
522 return Err(GpuError::InitializationError(
523 "Invalid device type".to_string(),
524 ))
525 }
526 };
527
528 let instance = wgpu::Instance::default();
529
530 let adapter = instance
531 .request_adapter(&wgpu::RequestAdapterOptions {
532 power_preference,
533 compatible_surface: None,
534 force_fallback_adapter: force_fallback,
535 })
536 .await
537 .ok_or_else(|| {
538 GpuError::InitializationError("No suitable adapter found".to_string())
539 })?;
540
541 let (device, queue) = adapter
542 .request_device(
543 &wgpu::DeviceDescriptor {
544 label: Some("Amari GPU Info Geometry Device"),
545 required_features: wgpu::Features::empty(),
546 required_limits: wgpu::Limits::default(),
547 },
548 None,
549 )
550 .await
551 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
552
553 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
554 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
555 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
556
557 Ok(Self {
558 device,
559 queue,
560 tensor_pipeline,
561 fisher_pipeline,
562 divergence_pipeline,
563 })
564 }
565
566 pub async fn amari_chentsov_tensor(
568 &self,
569 x: &Multivector<3, 0, 0>,
570 y: &Multivector<3, 0, 0>,
571 z: &Multivector<3, 0, 0>,
572 ) -> Result<f64, GpuError> {
573 Ok(amari_chentsov_tensor(x, y, z))
575 }
576
577 pub async fn amari_chentsov_tensor_batch(
588 &self,
589 x_batch: &[Multivector<3, 0, 0>],
590 y_batch: &[Multivector<3, 0, 0>],
591 z_batch: &[Multivector<3, 0, 0>],
592 ) -> Result<Vec<f64>, GpuError> {
593 let batch_size = x_batch.len();
594 if batch_size == 0 {
595 return Ok(Vec::new());
596 }
597
598 if batch_size < 100 {
600 let results = x_batch
601 .iter()
602 .zip(y_batch.iter())
603 .zip(z_batch.iter())
604 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
605 .collect();
606 return Ok(results);
607 }
608
609 let results = x_batch
613 .iter()
614 .zip(y_batch.iter())
615 .zip(z_batch.iter())
616 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
617 .collect();
618 Ok(results)
619 }
620
621 pub async fn amari_chentsov_tensor_from_typed_arrays(
623 &self,
624 flat_data: &[f64],
625 batch_size: usize,
626 ) -> Result<Vec<f64>, GpuError> {
627 if flat_data.len() != batch_size * 9 {
628 return Err(GpuError::BufferError("Invalid flat data size".to_string()));
629 }
630
631 let mut x_batch = Vec::with_capacity(batch_size);
633 let mut y_batch = Vec::with_capacity(batch_size);
634 let mut z_batch = Vec::with_capacity(batch_size);
635
636 for i in 0..batch_size {
637 let base = i * 9;
638 let mut x = Multivector::zero();
639 let mut y = Multivector::zero();
640 let mut z = Multivector::zero();
641
642 x.set_vector_component(0, flat_data[base]);
644 x.set_vector_component(1, flat_data[base + 1]);
645 x.set_vector_component(2, flat_data[base + 2]);
646
647 y.set_vector_component(0, flat_data[base + 3]);
648 y.set_vector_component(1, flat_data[base + 4]);
649 y.set_vector_component(2, flat_data[base + 5]);
650
651 z.set_vector_component(0, flat_data[base + 6]);
652 z.set_vector_component(1, flat_data[base + 7]);
653 z.set_vector_component(2, flat_data[base + 8]);
654
655 x_batch.push(x);
656 y_batch.push(y);
657 z_batch.push(z);
658 }
659
660 self.amari_chentsov_tensor_batch(&x_batch, &y_batch, &z_batch)
661 .await
662 }
663
664 pub async fn device_info(&self) -> Result<GpuDeviceInfo, GpuError> {
666 Ok(GpuDeviceInfo::new(true, "WebGPU Device"))
667 }
668
669 pub async fn memory_usage(&self) -> Result<u64, GpuError> {
671 Ok(1024 * 1024) }
674
675 pub async fn fisher_information_matrix(
677 &self,
678 _parameters: &[f64],
679 ) -> Result<GpuFisherMatrix, GpuError> {
680 Ok(GpuFisherMatrix::new(vec![vec![1.0, 0.0], vec![0.0, 1.0]]))
682 }
683
684 pub async fn bregman_divergence_batch(
686 &self,
687 p_batch: &[Vec<f64>],
688 q_batch: &[Vec<f64>],
689 ) -> Result<Vec<f64>, GpuError> {
690 let results = p_batch
692 .iter()
693 .zip(q_batch.iter())
694 .map(|(p, q)| {
695 p.iter()
697 .zip(q.iter())
698 .map(|(pi, qi)| {
699 if *pi > 0.0 && *qi > 0.0 {
700 pi * (pi / qi).ln()
701 } else {
702 0.0
703 }
704 })
705 .sum()
706 })
707 .collect();
708 Ok(results)
709 }
710
711 #[allow(dead_code)] async fn compute_tensor_batch_gpu(
723 &self,
724 x_batch: &[Multivector<3, 0, 0>],
725 y_batch: &[Multivector<3, 0, 0>],
726 z_batch: &[Multivector<3, 0, 0>],
727 ) -> Result<Vec<f64>, GpuError> {
728 let batch_size = x_batch.len();
729
730 let x_data: Vec<f32> = x_batch
732 .iter()
733 .flat_map(|mv| {
734 vec![
735 mv.vector_component(0) as f32,
736 mv.vector_component(1) as f32,
737 mv.vector_component(2) as f32,
738 ]
739 })
740 .collect();
741
742 let y_data: Vec<f32> = y_batch
743 .iter()
744 .flat_map(|mv| {
745 vec![
746 mv.vector_component(0) as f32,
747 mv.vector_component(1) as f32,
748 mv.vector_component(2) as f32,
749 ]
750 })
751 .collect();
752
753 let z_data: Vec<f32> = z_batch
754 .iter()
755 .flat_map(|mv| {
756 vec![
757 mv.vector_component(0) as f32,
758 mv.vector_component(1) as f32,
759 mv.vector_component(2) as f32,
760 ]
761 })
762 .collect();
763
764 let x_buffer = self
766 .device
767 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
768 label: Some("X Batch Buffer"),
769 contents: bytemuck::cast_slice(&x_data),
770 usage: wgpu::BufferUsages::STORAGE,
771 });
772
773 let y_buffer = self
774 .device
775 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
776 label: Some("Y Batch Buffer"),
777 contents: bytemuck::cast_slice(&y_data),
778 usage: wgpu::BufferUsages::STORAGE,
779 });
780
781 let z_buffer = self
782 .device
783 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
784 label: Some("Z Batch Buffer"),
785 contents: bytemuck::cast_slice(&z_data),
786 usage: wgpu::BufferUsages::STORAGE,
787 });
788
789 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
790 label: Some("Output Buffer"),
791 size: (batch_size * 4) as u64, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
793 mapped_at_creation: false,
794 });
795
796 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
797 label: Some("Staging Buffer"),
798 size: (batch_size * 4) as u64,
799 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
800 mapped_at_creation: false,
801 });
802
803 let bind_group_layout = self.tensor_pipeline.get_bind_group_layout(0);
805 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
806 label: Some("Tensor Compute Bind Group"),
807 layout: &bind_group_layout,
808 entries: &[
809 wgpu::BindGroupEntry {
810 binding: 0,
811 resource: x_buffer.as_entire_binding(),
812 },
813 wgpu::BindGroupEntry {
814 binding: 1,
815 resource: y_buffer.as_entire_binding(),
816 },
817 wgpu::BindGroupEntry {
818 binding: 2,
819 resource: z_buffer.as_entire_binding(),
820 },
821 wgpu::BindGroupEntry {
822 binding: 3,
823 resource: output_buffer.as_entire_binding(),
824 },
825 ],
826 });
827
828 let mut encoder = self
830 .device
831 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
832 label: Some("Tensor Compute Encoder"),
833 });
834
835 {
836 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
837 label: Some("Tensor Compute Pass"),
838 timestamp_writes: None,
839 });
840 compute_pass.set_pipeline(&self.tensor_pipeline);
841 compute_pass.set_bind_group(0, &bind_group, &[]);
842 let workgroup_count = batch_size.div_ceil(64); compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
844 }
845
846 encoder.copy_buffer_to_buffer(
847 &output_buffer,
848 0,
849 &staging_buffer,
850 0,
851 (batch_size * 4) as u64,
852 );
853
854 self.queue.submit(std::iter::once(encoder.finish()));
855
856 let buffer_slice = staging_buffer.slice(..);
858 let (sender, receiver) = futures::channel::oneshot::channel();
859 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
860 let _ = sender.send(result);
861 });
862
863 self.device.poll(wgpu::Maintain::Wait);
864
865 receiver
866 .await
867 .map_err(|_| GpuError::BufferError("Failed to receive buffer map result".to_string()))?
868 .map_err(|e| GpuError::BufferError(format!("Buffer mapping failed: {:?}", e)))?;
869
870 let data = buffer_slice.get_mapped_range();
871 let result_f32: &[f32] = bytemuck::cast_slice(&data);
872 let results: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
873
874 drop(data);
875 staging_buffer.unmap();
876
877 Ok(results)
878 }
879
880 fn create_tensor_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
881 let shader_source = TENSOR_COMPUTE_SHADER;
882 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
883 label: Some("Tensor Compute Shader"),
884 source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(shader_source)),
885 });
886
887 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
888 label: Some("Tensor Compute Pipeline"),
889 layout: None,
890 module: &shader,
891 entry_point: "main",
892 });
893
894 Ok(compute_pipeline)
895 }
896
897 fn create_fisher_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
898 Self::create_tensor_pipeline(device)
900 }
901
902 fn create_divergence_pipeline(
903 device: &wgpu::Device,
904 ) -> Result<wgpu::ComputePipeline, GpuError> {
905 Self::create_tensor_pipeline(device)
907 }
908}
909
910pub struct GpuDeviceInfo {
912 is_gpu: bool,
913 #[allow(dead_code)]
914 description: String,
915}
916
917impl GpuDeviceInfo {
918 fn new(is_gpu: bool, description: &str) -> Self {
919 Self {
920 is_gpu,
921 description: description.to_string(),
922 }
923 }
924
925 pub fn is_gpu(&self) -> bool {
926 self.is_gpu
927 }
928
929 pub fn supports_webgpu(&self) -> bool {
930 self.is_gpu
931 }
932
933 pub fn is_initialized(&self) -> bool {
934 true
935 }
936}
937
938pub struct GpuFisherMatrix {
940 matrix: Vec<Vec<f64>>,
941}
942
943impl GpuFisherMatrix {
944 fn new(matrix: Vec<Vec<f64>>) -> Self {
945 Self { matrix }
946 }
947
948 pub async fn eigenvalues(&self) -> Result<Vec<f64>, GpuError> {
949 let mut eigenvals = Vec::new();
951 for i in 0..self.matrix.len() {
952 if i < self.matrix[i].len() {
953 eigenvals.push(self.matrix[i][i]);
954 }
955 }
956 Ok(eigenvals)
957 }
958}
959
960const TENSOR_COMPUTE_SHADER: &str = r#"
962@group(0) @binding(0)
963var<storage, read> x_batch: array<vec3<f32>>;
964
965@group(0) @binding(1)
966var<storage, read> y_batch: array<vec3<f32>>;
967
968@group(0) @binding(2)
969var<storage, read> z_batch: array<vec3<f32>>;
970
971@group(0) @binding(3)
972var<storage, read_write> output: array<f32>;
973
974@compute @workgroup_size(64)
975fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
976 let idx = global_id.x;
977 if (idx >= arrayLength(&x_batch)) {
978 return;
979 }
980
981 let x = x_batch[idx];
982 let y = y_batch[idx];
983 let z = z_batch[idx];
984
985 // Compute scalar triple product: x · (y × z)
986 let cross_yz = cross(y, z);
987 let scalar_triple = dot(x, cross_yz);
988
989 output[idx] = scalar_triple;
990}
991"#;
992
993#[cfg(test)]
994mod tests {
995 use super::*;
996
997 #[test]
998 fn test_should_use_gpu() {
999 assert!(!GpuCliffordAlgebra::should_use_gpu(10));
1000 assert!(GpuCliffordAlgebra::should_use_gpu(1000));
1001 }
1002
1003 #[tokio::test]
1004 async fn test_gpu_info_geometry_creation() {
1005 if std::env::var("CI").is_ok()
1007 || std::env::var("GITHUB_ACTIONS").is_ok()
1008 || std::env::var("DISPLAY").is_err()
1009 {
1010 println!("Skipping GPU test in CI environment");
1011 return;
1012 }
1013
1014 match GpuInfoGeometry::new().await {
1016 Ok(_) => {
1017 println!("GPU initialization successful");
1019 }
1020 Err(GpuError::InitializationError(_)) => {
1021 println!("GPU initialization failed - no GPU available");
1023 }
1024 Err(e) => panic!("Unexpected error: {:?}", e),
1025 }
1026 }
1027}