1pub mod adaptive;
4pub mod network;
5pub mod performance;
6pub mod relativistic;
7pub mod shaders;
8pub mod unified;
9pub mod verification;
10
11pub use adaptive::{
12 AdaptiveVerificationError, AdaptiveVerificationLevel, AdaptiveVerifier, CpuFeatures,
13 GpuBackend, PlatformCapabilities, PlatformPerformanceProfile, VerificationPlatform,
14 WasmEnvironment,
15};
16use amari_core::Multivector;
17use amari_info_geom::amari_chentsov_tensor;
18use bytemuck::{Pod, Zeroable};
19pub use network::{AdaptiveNetworkCompute, GpuGeometricNetwork, GpuNetworkError, GpuNetworkResult};
20pub use performance::{
21 AdaptiveDispatchPolicy, CalibrationResult, GpuProfile, GpuProfiler, WorkgroupConfig,
22 WorkgroupOptimizer,
23};
24pub use relativistic::{
25 GpuRelativisticParticle, GpuRelativisticPhysics, GpuSpacetimeVector, GpuTrajectoryParams,
26};
27pub use shaders::{ShaderLibrary, DUAL_SHADERS, FUSION_SHADERS, TROPICAL_SHADERS};
28use thiserror::Error;
29pub use unified::{
30 BufferPoolStats, EnhancedGpuBufferPool, GpuAccelerated, GpuContext, GpuDispatcher,
31 GpuOperationParams, GpuParam, PoolEntryStats, SharedGpuContext, UnifiedGpuError,
32 UnifiedGpuResult,
33};
34pub use verification::{
35 GpuBoundaryVerifier, GpuVerificationError, RelativisticVerifier, StatisticalGpuVerifier,
36 VerificationConfig, VerificationStrategy, VerifiedMultivector,
37};
38use wgpu::util::DeviceExt;
39
40#[derive(Error, Debug)]
41pub enum GpuError {
42 #[error("Failed to initialize GPU: {0}")]
43 InitializationError(String),
44
45 #[error("GPU buffer error: {0}")]
46 BufferError(String),
47
48 #[error("Shader compilation error: {0}")]
49 ShaderError(String),
50}
51
52pub struct GpuCliffordAlgebra {
54 device: wgpu::Device,
55 queue: wgpu::Queue,
56 compute_pipeline: wgpu::ComputePipeline,
57 cayley_buffer: wgpu::Buffer,
58 #[allow(dead_code)]
59 dim: usize,
60 basis_count: usize,
61}
62
63impl GpuCliffordAlgebra {
64 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Result<Self, GpuError> {
66 let instance = wgpu::Instance::default();
67
68 let adapter = instance
69 .request_adapter(&wgpu::RequestAdapterOptions {
70 power_preference: wgpu::PowerPreference::HighPerformance,
71 compatible_surface: None,
72 force_fallback_adapter: false,
73 })
74 .await
75 .ok_or_else(|| GpuError::InitializationError("No GPU adapter found".to_string()))?;
76
77 let (device, queue) = adapter
78 .request_device(
79 &wgpu::DeviceDescriptor {
80 label: Some("Amari GPU Device"),
81 required_features: wgpu::Features::empty(),
82 required_limits: wgpu::Limits::default(),
83 },
84 None,
85 )
86 .await
87 .map_err(|e| GpuError::InitializationError(e.to_string()))?;
88
89 let dim = P + Q + R;
90 let basis_count = 1 << dim;
91
92 let cayley_table = Self::generate_cayley_table::<P, Q, R>();
94 let cayley_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
95 label: Some("Cayley Table"),
96 contents: bytemuck::cast_slice(&cayley_table),
97 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
98 });
99
100 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
102 label: Some("Geometric Product Shader"),
103 source: wgpu::ShaderSource::Wgsl(GEOMETRIC_PRODUCT_SHADER.into()),
104 });
105
106 let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
107 label: Some("Compute Bind Group Layout"),
108 entries: &[
109 wgpu::BindGroupLayoutEntry {
110 binding: 0,
111 visibility: wgpu::ShaderStages::COMPUTE,
112 ty: wgpu::BindingType::Buffer {
113 ty: wgpu::BufferBindingType::Storage { read_only: true },
114 has_dynamic_offset: false,
115 min_binding_size: None,
116 },
117 count: None,
118 },
119 wgpu::BindGroupLayoutEntry {
120 binding: 1,
121 visibility: wgpu::ShaderStages::COMPUTE,
122 ty: wgpu::BindingType::Buffer {
123 ty: wgpu::BufferBindingType::Storage { read_only: true },
124 has_dynamic_offset: false,
125 min_binding_size: None,
126 },
127 count: None,
128 },
129 wgpu::BindGroupLayoutEntry {
130 binding: 2,
131 visibility: wgpu::ShaderStages::COMPUTE,
132 ty: wgpu::BindingType::Buffer {
133 ty: wgpu::BufferBindingType::Storage { read_only: true },
134 has_dynamic_offset: false,
135 min_binding_size: None,
136 },
137 count: None,
138 },
139 wgpu::BindGroupLayoutEntry {
140 binding: 3,
141 visibility: wgpu::ShaderStages::COMPUTE,
142 ty: wgpu::BindingType::Buffer {
143 ty: wgpu::BufferBindingType::Storage { read_only: false },
144 has_dynamic_offset: false,
145 min_binding_size: None,
146 },
147 count: None,
148 },
149 ],
150 });
151
152 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
153 label: Some("Compute Pipeline Layout"),
154 bind_group_layouts: &[&bind_group_layout],
155 push_constant_ranges: &[],
156 });
157
158 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
159 label: Some("Geometric Product Pipeline"),
160 layout: Some(&pipeline_layout),
161 module: &shader,
162 entry_point: "main",
163 });
164
165 Ok(Self {
166 device,
167 queue,
168 compute_pipeline,
169 cayley_buffer,
170 dim,
171 basis_count,
172 })
173 }
174
175 fn generate_cayley_table<const P: usize, const Q: usize, const R: usize>() -> Vec<CayleyEntry> {
177 use amari_core::cayley::CayleyTable;
178
179 let table = CayleyTable::<P, Q, R>::get();
180 let basis_count = 1 << (P + Q + R);
181 let mut flat_table = Vec::with_capacity(basis_count * basis_count);
182
183 for i in 0..basis_count {
184 for j in 0..basis_count {
185 let (sign, index) = table.get_product(i, j);
186 flat_table.push(CayleyEntry {
187 sign: sign as f32,
188 index: index as u32,
189 });
190 }
191 }
192
193 flat_table
194 }
195
196 pub async fn batch_geometric_product(
198 &self,
199 a_batch: &[f64],
200 b_batch: &[f64],
201 ) -> Result<Vec<f64>, GpuError> {
202 let batch_size = a_batch.len() / self.basis_count;
203
204 if a_batch.len() != b_batch.len() {
205 return Err(GpuError::BufferError(
206 "Input batches must have same size".to_string(),
207 ));
208 }
209
210 let a_f32: Vec<f32> = a_batch.iter().map(|&x| x as f32).collect();
212 let b_f32: Vec<f32> = b_batch.iter().map(|&x| x as f32).collect();
213
214 let a_buffer = self
216 .device
217 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
218 label: Some("A Buffer"),
219 contents: bytemuck::cast_slice(&a_f32),
220 usage: wgpu::BufferUsages::STORAGE,
221 });
222
223 let b_buffer = self
224 .device
225 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
226 label: Some("B Buffer"),
227 contents: bytemuck::cast_slice(&b_f32),
228 usage: wgpu::BufferUsages::STORAGE,
229 });
230
231 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
232 label: Some("Output Buffer"),
233 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
234 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
235 mapped_at_creation: false,
236 });
237
238 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
239 label: Some("Staging Buffer"),
240 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
241 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
242 mapped_at_creation: false,
243 });
244
245 let bind_group_layout = self.compute_pipeline.get_bind_group_layout(0);
247 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
248 label: Some("Compute Bind Group"),
249 layout: &bind_group_layout,
250 entries: &[
251 wgpu::BindGroupEntry {
252 binding: 0,
253 resource: self.cayley_buffer.as_entire_binding(),
254 },
255 wgpu::BindGroupEntry {
256 binding: 1,
257 resource: a_buffer.as_entire_binding(),
258 },
259 wgpu::BindGroupEntry {
260 binding: 2,
261 resource: b_buffer.as_entire_binding(),
262 },
263 wgpu::BindGroupEntry {
264 binding: 3,
265 resource: output_buffer.as_entire_binding(),
266 },
267 ],
268 });
269
270 let mut encoder = self
272 .device
273 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
274 label: Some("Compute Encoder"),
275 });
276
277 {
278 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
279 label: Some("Compute Pass"),
280 timestamp_writes: None,
281 });
282
283 compute_pass.set_pipeline(&self.compute_pipeline);
284 compute_pass.set_bind_group(0, &bind_group, &[]);
285 compute_pass.dispatch_workgroups(batch_size as u32, 1, 1);
286 }
287
288 encoder.copy_buffer_to_buffer(
289 &output_buffer,
290 0,
291 &staging_buffer,
292 0,
293 (a_batch.len() * std::mem::size_of::<f32>()) as u64,
294 );
295
296 self.queue.submit(Some(encoder.finish()));
297
298 let buffer_slice = staging_buffer.slice(..);
300 let (sender, receiver) = futures::channel::oneshot::channel();
301 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
302 sender.send(result).unwrap();
303 });
304
305 self.device.poll(wgpu::Maintain::Wait);
306 receiver
307 .await
308 .unwrap()
309 .map_err(|e| GpuError::BufferError(e.to_string()))?;
310
311 let data = buffer_slice.get_mapped_range();
312 let result_f32: &[f32] = bytemuck::cast_slice(&data);
313 let result: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
314
315 drop(data);
316 staging_buffer.unmap();
317
318 Ok(result)
319 }
320
321 pub fn should_use_gpu(operation_count: usize) -> bool {
323 operation_count >= 100
325 }
326}
327
328#[repr(C)]
330#[derive(Copy, Clone, Pod, Zeroable)]
331struct CayleyEntry {
332 sign: f32,
333 index: u32,
334}
335
336const GEOMETRIC_PRODUCT_SHADER: &str = r#"
338struct CayleyEntry {
339 sign: f32,
340 index: u32,
341}
342
343@group(0) @binding(0)
344var<storage, read> cayley_table: array<CayleyEntry>;
345
346@group(0) @binding(1)
347var<storage, read> a_batch: array<f32>;
348
349@group(0) @binding(2)
350var<storage, read> b_batch: array<f32>;
351
352@group(0) @binding(3)
353var<storage, read_write> output: array<f32>;
354
355const BASIS_COUNT: u32 = 8u; // For 3D Clifford algebra
356
357@compute @workgroup_size(1)
358fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
359 let batch_idx = global_id.x;
360 let offset = batch_idx * BASIS_COUNT;
361
362 // Clear output
363 for (var k = 0u; k < BASIS_COUNT; k = k + 1u) {
364 output[offset + k] = 0.0;
365 }
366
367 // Compute geometric product
368 for (var i = 0u; i < BASIS_COUNT; i = i + 1u) {
369 let a_coeff = a_batch[offset + i];
370 if (abs(a_coeff) < 1e-14) {
371 continue;
372 }
373
374 for (var j = 0u; j < BASIS_COUNT; j = j + 1u) {
375 let b_coeff = b_batch[offset + j];
376 if (abs(b_coeff) < 1e-14) {
377 continue;
378 }
379
380 let table_idx = i * BASIS_COUNT + j;
381 let entry = cayley_table[table_idx];
382 output[offset + entry.index] += entry.sign * a_coeff * b_coeff;
383 }
384 }
385}
386"#;
387
388pub struct AdaptiveCompute {
390 gpu: Option<GpuCliffordAlgebra>,
391}
392
393impl AdaptiveCompute {
394 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Self {
396 let gpu = GpuCliffordAlgebra::new::<P, Q, R>().await.ok();
397 Self { gpu }
398 }
399
400 pub async fn geometric_product<const P: usize, const Q: usize, const R: usize>(
402 &self,
403 a: &Multivector<P, Q, R>,
404 b: &Multivector<P, Q, R>,
405 ) -> Multivector<P, Q, R> {
406 a.geometric_product(b)
408 }
409
410 pub async fn batch_geometric_product(
412 &self,
413 a_batch: &[f64],
414 b_batch: &[f64],
415 ) -> Result<Vec<f64>, GpuError> {
416 let batch_size = a_batch.len() / 8; if let Some(gpu) = &self.gpu {
419 if GpuCliffordAlgebra::should_use_gpu(batch_size) {
420 return gpu.batch_geometric_product(a_batch, b_batch).await;
421 }
422 }
423
424 let mut result = Vec::with_capacity(a_batch.len());
426 for i in 0..batch_size {
427 let start = i * 8;
428 let end = start + 8;
429
430 let a = Multivector::<3, 0, 0>::from_coefficients(a_batch[start..end].to_vec());
431 let b = Multivector::<3, 0, 0>::from_coefficients(b_batch[start..end].to_vec());
432 let product = a.geometric_product(&b);
433
434 for j in 0..8 {
435 result.push(product.get(j));
436 }
437 }
438
439 Ok(result)
440 }
441}
442
443pub struct GpuInfoGeometry {
454 device: wgpu::Device,
455 queue: wgpu::Queue,
456 tensor_pipeline: wgpu::ComputePipeline,
457 #[allow(dead_code)]
458 fisher_pipeline: wgpu::ComputePipeline,
459 #[allow(dead_code)]
460 divergence_pipeline: wgpu::ComputePipeline,
461}
462
463impl GpuInfoGeometry {
464 pub async fn new() -> Result<Self, GpuError> {
466 let instance = wgpu::Instance::default();
467
468 let adapter = if let Some(adapter) = instance
470 .request_adapter(&wgpu::RequestAdapterOptions {
471 power_preference: wgpu::PowerPreference::HighPerformance,
472 compatible_surface: None,
473 force_fallback_adapter: false,
474 })
475 .await
476 {
477 adapter
478 } else if let Some(adapter) = instance
479 .request_adapter(&wgpu::RequestAdapterOptions {
480 power_preference: wgpu::PowerPreference::LowPower,
481 compatible_surface: None,
482 force_fallback_adapter: false,
483 })
484 .await
485 {
486 adapter
487 } else if let Some(adapter) = instance
488 .request_adapter(&wgpu::RequestAdapterOptions {
489 power_preference: wgpu::PowerPreference::None,
490 compatible_surface: None,
491 force_fallback_adapter: true,
492 })
493 .await
494 {
495 adapter
496 } else {
497 return Err(GpuError::InitializationError(
498 "No GPU adapter found".to_string(),
499 ));
500 };
501
502 let (device, queue) = adapter
503 .request_device(
504 &wgpu::DeviceDescriptor {
505 label: Some("Amari GPU Info Geometry Device"),
506 required_features: wgpu::Features::empty(),
507 required_limits: wgpu::Limits::default(),
508 },
509 None,
510 )
511 .await
512 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
513
514 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
516 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
517 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
518
519 Ok(Self {
520 device,
521 queue,
522 tensor_pipeline,
523 fisher_pipeline,
524 divergence_pipeline,
525 })
526 }
527
528 pub async fn new_with_device_preference(device_type: &str) -> Result<Self, GpuError> {
530 let (power_preference, force_fallback) = match device_type {
531 "high-performance" => (wgpu::PowerPreference::HighPerformance, false),
532 "low-power" => (wgpu::PowerPreference::LowPower, false),
533 "fallback" => (wgpu::PowerPreference::None, true),
534 _ => {
535 return Err(GpuError::InitializationError(
536 "Invalid device type".to_string(),
537 ))
538 }
539 };
540
541 let instance = wgpu::Instance::default();
542
543 let adapter = instance
544 .request_adapter(&wgpu::RequestAdapterOptions {
545 power_preference,
546 compatible_surface: None,
547 force_fallback_adapter: force_fallback,
548 })
549 .await
550 .ok_or_else(|| {
551 GpuError::InitializationError("No suitable adapter found".to_string())
552 })?;
553
554 let (device, queue) = adapter
555 .request_device(
556 &wgpu::DeviceDescriptor {
557 label: Some("Amari GPU Info Geometry Device"),
558 required_features: wgpu::Features::empty(),
559 required_limits: wgpu::Limits::default(),
560 },
561 None,
562 )
563 .await
564 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
565
566 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
567 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
568 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
569
570 Ok(Self {
571 device,
572 queue,
573 tensor_pipeline,
574 fisher_pipeline,
575 divergence_pipeline,
576 })
577 }
578
579 pub async fn amari_chentsov_tensor(
581 &self,
582 x: &Multivector<3, 0, 0>,
583 y: &Multivector<3, 0, 0>,
584 z: &Multivector<3, 0, 0>,
585 ) -> Result<f64, GpuError> {
586 Ok(amari_chentsov_tensor(x, y, z))
588 }
589
590 pub async fn amari_chentsov_tensor_batch(
601 &self,
602 x_batch: &[Multivector<3, 0, 0>],
603 y_batch: &[Multivector<3, 0, 0>],
604 z_batch: &[Multivector<3, 0, 0>],
605 ) -> Result<Vec<f64>, GpuError> {
606 let batch_size = x_batch.len();
607 if batch_size == 0 {
608 return Ok(Vec::new());
609 }
610
611 if batch_size < 100 {
613 let results = x_batch
614 .iter()
615 .zip(y_batch.iter())
616 .zip(z_batch.iter())
617 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
618 .collect();
619 return Ok(results);
620 }
621
622 let results = x_batch
626 .iter()
627 .zip(y_batch.iter())
628 .zip(z_batch.iter())
629 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
630 .collect();
631 Ok(results)
632 }
633
634 pub async fn amari_chentsov_tensor_from_typed_arrays(
636 &self,
637 flat_data: &[f64],
638 batch_size: usize,
639 ) -> Result<Vec<f64>, GpuError> {
640 if flat_data.len() != batch_size * 9 {
641 return Err(GpuError::BufferError("Invalid flat data size".to_string()));
642 }
643
644 let mut x_batch = Vec::with_capacity(batch_size);
646 let mut y_batch = Vec::with_capacity(batch_size);
647 let mut z_batch = Vec::with_capacity(batch_size);
648
649 for i in 0..batch_size {
650 let base = i * 9;
651 let mut x = Multivector::zero();
652 let mut y = Multivector::zero();
653 let mut z = Multivector::zero();
654
655 x.set_vector_component(0, flat_data[base]);
657 x.set_vector_component(1, flat_data[base + 1]);
658 x.set_vector_component(2, flat_data[base + 2]);
659
660 y.set_vector_component(0, flat_data[base + 3]);
661 y.set_vector_component(1, flat_data[base + 4]);
662 y.set_vector_component(2, flat_data[base + 5]);
663
664 z.set_vector_component(0, flat_data[base + 6]);
665 z.set_vector_component(1, flat_data[base + 7]);
666 z.set_vector_component(2, flat_data[base + 8]);
667
668 x_batch.push(x);
669 y_batch.push(y);
670 z_batch.push(z);
671 }
672
673 self.amari_chentsov_tensor_batch(&x_batch, &y_batch, &z_batch)
674 .await
675 }
676
677 pub async fn device_info(&self) -> Result<GpuDeviceInfo, GpuError> {
679 Ok(GpuDeviceInfo::new(true, "WebGPU Device"))
680 }
681
682 pub async fn memory_usage(&self) -> Result<u64, GpuError> {
684 Ok(1024 * 1024) }
687
688 pub async fn fisher_information_matrix(
690 &self,
691 _parameters: &[f64],
692 ) -> Result<GpuFisherMatrix, GpuError> {
693 Ok(GpuFisherMatrix::new(vec![vec![1.0, 0.0], vec![0.0, 1.0]]))
695 }
696
697 pub async fn bregman_divergence_batch(
699 &self,
700 p_batch: &[Vec<f64>],
701 q_batch: &[Vec<f64>],
702 ) -> Result<Vec<f64>, GpuError> {
703 let results = p_batch
705 .iter()
706 .zip(q_batch.iter())
707 .map(|(p, q)| {
708 p.iter()
710 .zip(q.iter())
711 .map(|(pi, qi)| {
712 if *pi > 0.0 && *qi > 0.0 {
713 pi * (pi / qi).ln()
714 } else {
715 0.0
716 }
717 })
718 .sum()
719 })
720 .collect();
721 Ok(results)
722 }
723
724 #[allow(dead_code)] async fn compute_tensor_batch_gpu(
736 &self,
737 x_batch: &[Multivector<3, 0, 0>],
738 y_batch: &[Multivector<3, 0, 0>],
739 z_batch: &[Multivector<3, 0, 0>],
740 ) -> Result<Vec<f64>, GpuError> {
741 let batch_size = x_batch.len();
742
743 let x_data: Vec<f32> = x_batch
745 .iter()
746 .flat_map(|mv| {
747 vec![
748 mv.vector_component(0) as f32,
749 mv.vector_component(1) as f32,
750 mv.vector_component(2) as f32,
751 ]
752 })
753 .collect();
754
755 let y_data: Vec<f32> = y_batch
756 .iter()
757 .flat_map(|mv| {
758 vec![
759 mv.vector_component(0) as f32,
760 mv.vector_component(1) as f32,
761 mv.vector_component(2) as f32,
762 ]
763 })
764 .collect();
765
766 let z_data: Vec<f32> = z_batch
767 .iter()
768 .flat_map(|mv| {
769 vec![
770 mv.vector_component(0) as f32,
771 mv.vector_component(1) as f32,
772 mv.vector_component(2) as f32,
773 ]
774 })
775 .collect();
776
777 let x_buffer = self
779 .device
780 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
781 label: Some("X Batch Buffer"),
782 contents: bytemuck::cast_slice(&x_data),
783 usage: wgpu::BufferUsages::STORAGE,
784 });
785
786 let y_buffer = self
787 .device
788 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
789 label: Some("Y Batch Buffer"),
790 contents: bytemuck::cast_slice(&y_data),
791 usage: wgpu::BufferUsages::STORAGE,
792 });
793
794 let z_buffer = self
795 .device
796 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
797 label: Some("Z Batch Buffer"),
798 contents: bytemuck::cast_slice(&z_data),
799 usage: wgpu::BufferUsages::STORAGE,
800 });
801
802 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
803 label: Some("Output Buffer"),
804 size: (batch_size * 4) as u64, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
806 mapped_at_creation: false,
807 });
808
809 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
810 label: Some("Staging Buffer"),
811 size: (batch_size * 4) as u64,
812 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
813 mapped_at_creation: false,
814 });
815
816 let bind_group_layout = self.tensor_pipeline.get_bind_group_layout(0);
818 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
819 label: Some("Tensor Compute Bind Group"),
820 layout: &bind_group_layout,
821 entries: &[
822 wgpu::BindGroupEntry {
823 binding: 0,
824 resource: x_buffer.as_entire_binding(),
825 },
826 wgpu::BindGroupEntry {
827 binding: 1,
828 resource: y_buffer.as_entire_binding(),
829 },
830 wgpu::BindGroupEntry {
831 binding: 2,
832 resource: z_buffer.as_entire_binding(),
833 },
834 wgpu::BindGroupEntry {
835 binding: 3,
836 resource: output_buffer.as_entire_binding(),
837 },
838 ],
839 });
840
841 let mut encoder = self
843 .device
844 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
845 label: Some("Tensor Compute Encoder"),
846 });
847
848 {
849 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
850 label: Some("Tensor Compute Pass"),
851 timestamp_writes: None,
852 });
853 compute_pass.set_pipeline(&self.tensor_pipeline);
854 compute_pass.set_bind_group(0, &bind_group, &[]);
855 let workgroup_count = batch_size.div_ceil(64); compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
857 }
858
859 encoder.copy_buffer_to_buffer(
860 &output_buffer,
861 0,
862 &staging_buffer,
863 0,
864 (batch_size * 4) as u64,
865 );
866
867 self.queue.submit(std::iter::once(encoder.finish()));
868
869 let buffer_slice = staging_buffer.slice(..);
871 let (sender, receiver) = futures::channel::oneshot::channel();
872 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
873 let _ = sender.send(result);
874 });
875
876 self.device.poll(wgpu::Maintain::Wait);
877
878 receiver
879 .await
880 .map_err(|_| GpuError::BufferError("Failed to receive buffer map result".to_string()))?
881 .map_err(|e| GpuError::BufferError(format!("Buffer mapping failed: {:?}", e)))?;
882
883 let data = buffer_slice.get_mapped_range();
884 let result_f32: &[f32] = bytemuck::cast_slice(&data);
885 let results: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
886
887 drop(data);
888 staging_buffer.unmap();
889
890 Ok(results)
891 }
892
893 fn create_tensor_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
894 let shader_source = TENSOR_COMPUTE_SHADER;
895 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
896 label: Some("Tensor Compute Shader"),
897 source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(shader_source)),
898 });
899
900 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
901 label: Some("Tensor Compute Pipeline"),
902 layout: None,
903 module: &shader,
904 entry_point: "main",
905 });
906
907 Ok(compute_pipeline)
908 }
909
910 fn create_fisher_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
911 Self::create_tensor_pipeline(device)
913 }
914
915 fn create_divergence_pipeline(
916 device: &wgpu::Device,
917 ) -> Result<wgpu::ComputePipeline, GpuError> {
918 Self::create_tensor_pipeline(device)
920 }
921}
922
923pub struct GpuDeviceInfo {
925 is_gpu: bool,
926 #[allow(dead_code)]
927 description: String,
928}
929
930impl GpuDeviceInfo {
931 fn new(is_gpu: bool, description: &str) -> Self {
932 Self {
933 is_gpu,
934 description: description.to_string(),
935 }
936 }
937
938 pub fn is_gpu(&self) -> bool {
939 self.is_gpu
940 }
941
942 pub fn supports_webgpu(&self) -> bool {
943 self.is_gpu
944 }
945
946 pub fn is_initialized(&self) -> bool {
947 true
948 }
949}
950
951pub struct GpuFisherMatrix {
953 matrix: Vec<Vec<f64>>,
954}
955
956impl GpuFisherMatrix {
957 fn new(matrix: Vec<Vec<f64>>) -> Self {
958 Self { matrix }
959 }
960
961 pub async fn eigenvalues(&self) -> Result<Vec<f64>, GpuError> {
962 let mut eigenvals = Vec::new();
964 for i in 0..self.matrix.len() {
965 if i < self.matrix[i].len() {
966 eigenvals.push(self.matrix[i][i]);
967 }
968 }
969 Ok(eigenvals)
970 }
971}
972
973const TENSOR_COMPUTE_SHADER: &str = r#"
975@group(0) @binding(0)
976var<storage, read> x_batch: array<vec3<f32>>;
977
978@group(0) @binding(1)
979var<storage, read> y_batch: array<vec3<f32>>;
980
981@group(0) @binding(2)
982var<storage, read> z_batch: array<vec3<f32>>;
983
984@group(0) @binding(3)
985var<storage, read_write> output: array<f32>;
986
987@compute @workgroup_size(64)
988fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
989 let idx = global_id.x;
990 if (idx >= arrayLength(&x_batch)) {
991 return;
992 }
993
994 let x = x_batch[idx];
995 let y = y_batch[idx];
996 let z = z_batch[idx];
997
998 // Compute scalar triple product: x · (y × z)
999 let cross_yz = cross(y, z);
1000 let scalar_triple = dot(x, cross_yz);
1001
1002 output[idx] = scalar_triple;
1003}
1004"#;
1005
1006#[cfg(test)]
1007mod tests {
1008 use super::*;
1009
1010 #[test]
1011 fn test_should_use_gpu() {
1012 assert!(!GpuCliffordAlgebra::should_use_gpu(10));
1013 assert!(GpuCliffordAlgebra::should_use_gpu(1000));
1014 }
1015
1016 #[tokio::test]
1017 async fn test_gpu_info_geometry_creation() {
1018 if std::env::var("CI").is_ok()
1020 || std::env::var("GITHUB_ACTIONS").is_ok()
1021 || std::env::var("DISPLAY").is_err()
1022 {
1023 println!("Skipping GPU test in CI environment");
1024 return;
1025 }
1026
1027 match GpuInfoGeometry::new().await {
1029 Ok(_) => {
1030 println!("GPU initialization successful");
1032 }
1033 Err(GpuError::InitializationError(_)) => {
1034 println!("GPU initialization failed - no GPU available");
1036 }
1037 Err(e) => panic!("Unexpected error: {:?}", e),
1038 }
1039 }
1040}