1pub mod adaptive;
4pub mod benchmarks;
5#[cfg(feature = "measure")]
6pub mod measure;
7pub mod multi_gpu;
8pub mod network;
9pub mod performance;
10pub mod relativistic;
11pub mod shaders;
12pub mod timeline;
13pub mod unified;
14pub mod verification;
15
16pub use adaptive::{
17 AdaptiveVerificationError, AdaptiveVerificationLevel, AdaptiveVerifier, CpuFeatures,
18 GpuBackend, PlatformCapabilities, PlatformPerformanceProfile, VerificationPlatform,
19 WasmEnvironment,
20};
21use amari_core::Multivector;
22use amari_info_geom::amari_chentsov_tensor;
23pub use benchmarks::{
24 AmariMultiGpuBenchmarks, BenchmarkConfig, BenchmarkResult, BenchmarkRunner,
25 BenchmarkSuiteResults, BenchmarkSummary, ScalingAnalysis,
26};
27use bytemuck::{Pod, Zeroable};
28#[cfg(feature = "measure")]
29pub use measure::{
30 GpuIntegrator, GpuMonteCarloIntegrator, GpuMultidimIntegrator, GpuParametricDensity,
31 GpuTropicalMeasure,
32};
33pub use multi_gpu::{
34 ComputeIntensity, DeviceCapabilities, DeviceId, DeviceWorkload, GpuArchitecture, GpuDevice,
35 IntelligentLoadBalancer, LoadBalancingStrategy, MultiGpuBarrier, PerformanceRecord,
36 PerformanceStats, SynchronizationManager, Workload, WorkloadCoordinator,
37};
38pub use network::{AdaptiveNetworkCompute, GpuGeometricNetwork, GpuNetworkError, GpuNetworkResult};
39pub use performance::{
40 AdaptiveDispatchPolicy, CalibrationResult, GpuProfile, GpuProfiler, WorkgroupConfig,
41 WorkgroupOptimizer,
42};
43pub use relativistic::{
44 GpuRelativisticParticle, GpuRelativisticPhysics, GpuSpacetimeVector, GpuTrajectoryParams,
45};
46pub use shaders::{ShaderLibrary, DUAL_SHADERS, FUSION_SHADERS, TROPICAL_SHADERS};
47use thiserror::Error;
48pub use timeline::{
49 BottleneckAnalysis, DeviceUtilizationStats, GpuTimelineAnalyzer, MultiGpuPerformanceMonitor,
50 OptimizationRecommendation, PerformanceAnalysisReport, PerformanceBottleneck,
51 PerformanceSummary, RecommendationPriority, SynchronizationAnalysis, TimelineEvent,
52 UtilizationAnalysis,
53};
54pub use unified::{
55 BufferPoolStats, EnhancedGpuBufferPool, GpuAccelerated, GpuContext, GpuDispatcher,
56 GpuOperationParams, GpuParam, MultiGpuStats, PoolEntryStats, SharedGpuContext, UnifiedGpuError,
57 UnifiedGpuResult,
58};
59pub use verification::{
60 GpuBoundaryVerifier, GpuVerificationError, RelativisticVerifier, StatisticalGpuVerifier,
61 VerificationConfig, VerificationStrategy, VerifiedMultivector,
62};
63use wgpu::util::DeviceExt;
64
65#[derive(Error, Debug)]
66pub enum GpuError {
67 #[error("Failed to initialize GPU: {0}")]
68 InitializationError(String),
69
70 #[error("GPU buffer error: {0}")]
71 BufferError(String),
72
73 #[error("Shader compilation error: {0}")]
74 ShaderError(String),
75}
76
77pub struct GpuCliffordAlgebra {
79 device: wgpu::Device,
80 queue: wgpu::Queue,
81 compute_pipeline: wgpu::ComputePipeline,
82 cayley_buffer: wgpu::Buffer,
83 #[allow(dead_code)]
84 dim: usize,
85 basis_count: usize,
86}
87
88impl GpuCliffordAlgebra {
89 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Result<Self, GpuError> {
91 let instance = wgpu::Instance::default();
92
93 let adapter = instance
94 .request_adapter(&wgpu::RequestAdapterOptions {
95 power_preference: wgpu::PowerPreference::HighPerformance,
96 compatible_surface: None,
97 force_fallback_adapter: false,
98 })
99 .await
100 .ok_or_else(|| GpuError::InitializationError("No GPU adapter found".to_string()))?;
101
102 let (device, queue) = adapter
103 .request_device(
104 &wgpu::DeviceDescriptor {
105 label: Some("Amari GPU Device"),
106 required_features: wgpu::Features::empty(),
107 required_limits: wgpu::Limits::default(),
108 },
109 None,
110 )
111 .await
112 .map_err(|e| GpuError::InitializationError(e.to_string()))?;
113
114 let dim = P + Q + R;
115 let basis_count = 1 << dim;
116
117 let cayley_table = Self::generate_cayley_table::<P, Q, R>();
119 let cayley_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
120 label: Some("Cayley Table"),
121 contents: bytemuck::cast_slice(&cayley_table),
122 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
123 });
124
125 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
127 label: Some("Geometric Product Shader"),
128 source: wgpu::ShaderSource::Wgsl(GEOMETRIC_PRODUCT_SHADER.into()),
129 });
130
131 let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
132 label: Some("Compute Bind Group Layout"),
133 entries: &[
134 wgpu::BindGroupLayoutEntry {
135 binding: 0,
136 visibility: wgpu::ShaderStages::COMPUTE,
137 ty: wgpu::BindingType::Buffer {
138 ty: wgpu::BufferBindingType::Storage { read_only: true },
139 has_dynamic_offset: false,
140 min_binding_size: None,
141 },
142 count: None,
143 },
144 wgpu::BindGroupLayoutEntry {
145 binding: 1,
146 visibility: wgpu::ShaderStages::COMPUTE,
147 ty: wgpu::BindingType::Buffer {
148 ty: wgpu::BufferBindingType::Storage { read_only: true },
149 has_dynamic_offset: false,
150 min_binding_size: None,
151 },
152 count: None,
153 },
154 wgpu::BindGroupLayoutEntry {
155 binding: 2,
156 visibility: wgpu::ShaderStages::COMPUTE,
157 ty: wgpu::BindingType::Buffer {
158 ty: wgpu::BufferBindingType::Storage { read_only: true },
159 has_dynamic_offset: false,
160 min_binding_size: None,
161 },
162 count: None,
163 },
164 wgpu::BindGroupLayoutEntry {
165 binding: 3,
166 visibility: wgpu::ShaderStages::COMPUTE,
167 ty: wgpu::BindingType::Buffer {
168 ty: wgpu::BufferBindingType::Storage { read_only: false },
169 has_dynamic_offset: false,
170 min_binding_size: None,
171 },
172 count: None,
173 },
174 ],
175 });
176
177 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
178 label: Some("Compute Pipeline Layout"),
179 bind_group_layouts: &[&bind_group_layout],
180 push_constant_ranges: &[],
181 });
182
183 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
184 label: Some("Geometric Product Pipeline"),
185 layout: Some(&pipeline_layout),
186 module: &shader,
187 entry_point: "main",
188 });
189
190 Ok(Self {
191 device,
192 queue,
193 compute_pipeline,
194 cayley_buffer,
195 dim,
196 basis_count,
197 })
198 }
199
200 fn generate_cayley_table<const P: usize, const Q: usize, const R: usize>() -> Vec<CayleyEntry> {
202 use amari_core::cayley::CayleyTable;
203
204 let table = CayleyTable::<P, Q, R>::get();
205 let basis_count = 1 << (P + Q + R);
206 let mut flat_table = Vec::with_capacity(basis_count * basis_count);
207
208 for i in 0..basis_count {
209 for j in 0..basis_count {
210 let (sign, index) = table.get_product(i, j);
211 flat_table.push(CayleyEntry {
212 sign: sign as f32,
213 index: index as u32,
214 });
215 }
216 }
217
218 flat_table
219 }
220
221 pub async fn batch_geometric_product(
223 &self,
224 a_batch: &[f64],
225 b_batch: &[f64],
226 ) -> Result<Vec<f64>, GpuError> {
227 let batch_size = a_batch.len() / self.basis_count;
228
229 if a_batch.len() != b_batch.len() {
230 return Err(GpuError::BufferError(
231 "Input batches must have same size".to_string(),
232 ));
233 }
234
235 let a_f32: Vec<f32> = a_batch.iter().map(|&x| x as f32).collect();
237 let b_f32: Vec<f32> = b_batch.iter().map(|&x| x as f32).collect();
238
239 let a_buffer = self
241 .device
242 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
243 label: Some("A Buffer"),
244 contents: bytemuck::cast_slice(&a_f32),
245 usage: wgpu::BufferUsages::STORAGE,
246 });
247
248 let b_buffer = self
249 .device
250 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
251 label: Some("B Buffer"),
252 contents: bytemuck::cast_slice(&b_f32),
253 usage: wgpu::BufferUsages::STORAGE,
254 });
255
256 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
257 label: Some("Output Buffer"),
258 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
259 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
260 mapped_at_creation: false,
261 });
262
263 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
264 label: Some("Staging Buffer"),
265 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
266 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
267 mapped_at_creation: false,
268 });
269
270 let bind_group_layout = self.compute_pipeline.get_bind_group_layout(0);
272 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
273 label: Some("Compute Bind Group"),
274 layout: &bind_group_layout,
275 entries: &[
276 wgpu::BindGroupEntry {
277 binding: 0,
278 resource: self.cayley_buffer.as_entire_binding(),
279 },
280 wgpu::BindGroupEntry {
281 binding: 1,
282 resource: a_buffer.as_entire_binding(),
283 },
284 wgpu::BindGroupEntry {
285 binding: 2,
286 resource: b_buffer.as_entire_binding(),
287 },
288 wgpu::BindGroupEntry {
289 binding: 3,
290 resource: output_buffer.as_entire_binding(),
291 },
292 ],
293 });
294
295 let mut encoder = self
297 .device
298 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
299 label: Some("Compute Encoder"),
300 });
301
302 {
303 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
304 label: Some("Compute Pass"),
305 timestamp_writes: None,
306 });
307
308 compute_pass.set_pipeline(&self.compute_pipeline);
309 compute_pass.set_bind_group(0, &bind_group, &[]);
310 compute_pass.dispatch_workgroups(batch_size as u32, 1, 1);
311 }
312
313 encoder.copy_buffer_to_buffer(
314 &output_buffer,
315 0,
316 &staging_buffer,
317 0,
318 (a_batch.len() * std::mem::size_of::<f32>()) as u64,
319 );
320
321 self.queue.submit(Some(encoder.finish()));
322
323 let buffer_slice = staging_buffer.slice(..);
325 let (sender, receiver) = futures::channel::oneshot::channel();
326 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
327 sender.send(result).unwrap();
328 });
329
330 self.device.poll(wgpu::Maintain::Wait);
331 receiver
332 .await
333 .unwrap()
334 .map_err(|e| GpuError::BufferError(e.to_string()))?;
335
336 let data = buffer_slice.get_mapped_range();
337 let result_f32: &[f32] = bytemuck::cast_slice(&data);
338 let result: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
339
340 drop(data);
341 staging_buffer.unmap();
342
343 Ok(result)
344 }
345
346 pub fn should_use_gpu(operation_count: usize) -> bool {
348 operation_count >= 100
350 }
351}
352
353#[repr(C)]
355#[derive(Copy, Clone, Pod, Zeroable)]
356struct CayleyEntry {
357 sign: f32,
358 index: u32,
359}
360
361const GEOMETRIC_PRODUCT_SHADER: &str = r#"
363struct CayleyEntry {
364 sign: f32,
365 index: u32,
366}
367
368@group(0) @binding(0)
369var<storage, read> cayley_table: array<CayleyEntry>;
370
371@group(0) @binding(1)
372var<storage, read> a_batch: array<f32>;
373
374@group(0) @binding(2)
375var<storage, read> b_batch: array<f32>;
376
377@group(0) @binding(3)
378var<storage, read_write> output: array<f32>;
379
380const BASIS_COUNT: u32 = 8u; // For 3D Clifford algebra
381
382@compute @workgroup_size(1)
383fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
384 let batch_idx = global_id.x;
385 let offset = batch_idx * BASIS_COUNT;
386
387 // Clear output
388 for (var k = 0u; k < BASIS_COUNT; k = k + 1u) {
389 output[offset + k] = 0.0;
390 }
391
392 // Compute geometric product
393 for (var i = 0u; i < BASIS_COUNT; i = i + 1u) {
394 let a_coeff = a_batch[offset + i];
395 if (abs(a_coeff) < 1e-14) {
396 continue;
397 }
398
399 for (var j = 0u; j < BASIS_COUNT; j = j + 1u) {
400 let b_coeff = b_batch[offset + j];
401 if (abs(b_coeff) < 1e-14) {
402 continue;
403 }
404
405 let table_idx = i * BASIS_COUNT + j;
406 let entry = cayley_table[table_idx];
407 output[offset + entry.index] += entry.sign * a_coeff * b_coeff;
408 }
409 }
410}
411"#;
412
413pub struct AdaptiveCompute {
415 gpu: Option<GpuCliffordAlgebra>,
416}
417
418impl AdaptiveCompute {
419 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Self {
421 let gpu = GpuCliffordAlgebra::new::<P, Q, R>().await.ok();
422 Self { gpu }
423 }
424
425 pub async fn geometric_product<const P: usize, const Q: usize, const R: usize>(
427 &self,
428 a: &Multivector<P, Q, R>,
429 b: &Multivector<P, Q, R>,
430 ) -> Multivector<P, Q, R> {
431 a.geometric_product(b)
433 }
434
435 pub async fn batch_geometric_product(
437 &self,
438 a_batch: &[f64],
439 b_batch: &[f64],
440 ) -> Result<Vec<f64>, GpuError> {
441 let batch_size = a_batch.len() / 8; if let Some(gpu) = &self.gpu {
444 if GpuCliffordAlgebra::should_use_gpu(batch_size) {
445 return gpu.batch_geometric_product(a_batch, b_batch).await;
446 }
447 }
448
449 let mut result = Vec::with_capacity(a_batch.len());
451 for i in 0..batch_size {
452 let start = i * 8;
453 let end = start + 8;
454
455 let a = Multivector::<3, 0, 0>::from_coefficients(a_batch[start..end].to_vec());
456 let b = Multivector::<3, 0, 0>::from_coefficients(b_batch[start..end].to_vec());
457 let product = a.geometric_product(&b);
458
459 for j in 0..8 {
460 result.push(product.get(j));
461 }
462 }
463
464 Ok(result)
465 }
466}
467
468pub struct GpuInfoGeometry {
479 device: wgpu::Device,
480 queue: wgpu::Queue,
481 tensor_pipeline: wgpu::ComputePipeline,
482 #[allow(dead_code)]
483 fisher_pipeline: wgpu::ComputePipeline,
484 #[allow(dead_code)]
485 divergence_pipeline: wgpu::ComputePipeline,
486}
487
488impl GpuInfoGeometry {
489 pub async fn new() -> Result<Self, GpuError> {
491 let instance = wgpu::Instance::default();
492
493 let adapter = if let Some(adapter) = instance
495 .request_adapter(&wgpu::RequestAdapterOptions {
496 power_preference: wgpu::PowerPreference::HighPerformance,
497 compatible_surface: None,
498 force_fallback_adapter: false,
499 })
500 .await
501 {
502 adapter
503 } else if let Some(adapter) = instance
504 .request_adapter(&wgpu::RequestAdapterOptions {
505 power_preference: wgpu::PowerPreference::LowPower,
506 compatible_surface: None,
507 force_fallback_adapter: false,
508 })
509 .await
510 {
511 adapter
512 } else if let Some(adapter) = instance
513 .request_adapter(&wgpu::RequestAdapterOptions {
514 power_preference: wgpu::PowerPreference::None,
515 compatible_surface: None,
516 force_fallback_adapter: true,
517 })
518 .await
519 {
520 adapter
521 } else {
522 return Err(GpuError::InitializationError(
523 "No GPU adapter found".to_string(),
524 ));
525 };
526
527 let (device, queue) = adapter
528 .request_device(
529 &wgpu::DeviceDescriptor {
530 label: Some("Amari GPU Info Geometry Device"),
531 required_features: wgpu::Features::empty(),
532 required_limits: wgpu::Limits::default(),
533 },
534 None,
535 )
536 .await
537 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
538
539 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
541 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
542 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
543
544 Ok(Self {
545 device,
546 queue,
547 tensor_pipeline,
548 fisher_pipeline,
549 divergence_pipeline,
550 })
551 }
552
553 pub async fn new_with_device_preference(device_type: &str) -> Result<Self, GpuError> {
555 let (power_preference, force_fallback) = match device_type {
556 "high-performance" => (wgpu::PowerPreference::HighPerformance, false),
557 "low-power" => (wgpu::PowerPreference::LowPower, false),
558 "fallback" => (wgpu::PowerPreference::None, true),
559 _ => {
560 return Err(GpuError::InitializationError(
561 "Invalid device type".to_string(),
562 ))
563 }
564 };
565
566 let instance = wgpu::Instance::default();
567
568 let adapter = instance
569 .request_adapter(&wgpu::RequestAdapterOptions {
570 power_preference,
571 compatible_surface: None,
572 force_fallback_adapter: force_fallback,
573 })
574 .await
575 .ok_or_else(|| {
576 GpuError::InitializationError("No suitable adapter found".to_string())
577 })?;
578
579 let (device, queue) = adapter
580 .request_device(
581 &wgpu::DeviceDescriptor {
582 label: Some("Amari GPU Info Geometry Device"),
583 required_features: wgpu::Features::empty(),
584 required_limits: wgpu::Limits::default(),
585 },
586 None,
587 )
588 .await
589 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
590
591 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
592 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
593 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
594
595 Ok(Self {
596 device,
597 queue,
598 tensor_pipeline,
599 fisher_pipeline,
600 divergence_pipeline,
601 })
602 }
603
604 pub async fn amari_chentsov_tensor(
606 &self,
607 x: &Multivector<3, 0, 0>,
608 y: &Multivector<3, 0, 0>,
609 z: &Multivector<3, 0, 0>,
610 ) -> Result<f64, GpuError> {
611 Ok(amari_chentsov_tensor(x, y, z))
613 }
614
615 pub async fn amari_chentsov_tensor_batch(
626 &self,
627 x_batch: &[Multivector<3, 0, 0>],
628 y_batch: &[Multivector<3, 0, 0>],
629 z_batch: &[Multivector<3, 0, 0>],
630 ) -> Result<Vec<f64>, GpuError> {
631 let batch_size = x_batch.len();
632 if batch_size == 0 {
633 return Ok(Vec::new());
634 }
635
636 if batch_size < 100 {
638 let results = x_batch
639 .iter()
640 .zip(y_batch.iter())
641 .zip(z_batch.iter())
642 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
643 .collect();
644 return Ok(results);
645 }
646
647 let results = x_batch
651 .iter()
652 .zip(y_batch.iter())
653 .zip(z_batch.iter())
654 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
655 .collect();
656 Ok(results)
657 }
658
659 pub async fn amari_chentsov_tensor_from_typed_arrays(
661 &self,
662 flat_data: &[f64],
663 batch_size: usize,
664 ) -> Result<Vec<f64>, GpuError> {
665 if flat_data.len() != batch_size * 9 {
666 return Err(GpuError::BufferError("Invalid flat data size".to_string()));
667 }
668
669 let mut x_batch = Vec::with_capacity(batch_size);
671 let mut y_batch = Vec::with_capacity(batch_size);
672 let mut z_batch = Vec::with_capacity(batch_size);
673
674 for i in 0..batch_size {
675 let base = i * 9;
676 let mut x = Multivector::zero();
677 let mut y = Multivector::zero();
678 let mut z = Multivector::zero();
679
680 x.set_vector_component(0, flat_data[base]);
682 x.set_vector_component(1, flat_data[base + 1]);
683 x.set_vector_component(2, flat_data[base + 2]);
684
685 y.set_vector_component(0, flat_data[base + 3]);
686 y.set_vector_component(1, flat_data[base + 4]);
687 y.set_vector_component(2, flat_data[base + 5]);
688
689 z.set_vector_component(0, flat_data[base + 6]);
690 z.set_vector_component(1, flat_data[base + 7]);
691 z.set_vector_component(2, flat_data[base + 8]);
692
693 x_batch.push(x);
694 y_batch.push(y);
695 z_batch.push(z);
696 }
697
698 self.amari_chentsov_tensor_batch(&x_batch, &y_batch, &z_batch)
699 .await
700 }
701
702 pub async fn device_info(&self) -> Result<GpuDeviceInfo, GpuError> {
704 Ok(GpuDeviceInfo::new(true, "WebGPU Device"))
705 }
706
707 pub async fn memory_usage(&self) -> Result<u64, GpuError> {
709 Ok(1024 * 1024) }
712
713 pub async fn fisher_information_matrix(
715 &self,
716 _parameters: &[f64],
717 ) -> Result<GpuFisherMatrix, GpuError> {
718 Ok(GpuFisherMatrix::new(vec![vec![1.0, 0.0], vec![0.0, 1.0]]))
720 }
721
722 pub async fn bregman_divergence_batch(
724 &self,
725 p_batch: &[Vec<f64>],
726 q_batch: &[Vec<f64>],
727 ) -> Result<Vec<f64>, GpuError> {
728 let results = p_batch
730 .iter()
731 .zip(q_batch.iter())
732 .map(|(p, q)| {
733 p.iter()
735 .zip(q.iter())
736 .map(|(pi, qi)| {
737 if *pi > 0.0 && *qi > 0.0 {
738 pi * (pi / qi).ln()
739 } else {
740 0.0
741 }
742 })
743 .sum()
744 })
745 .collect();
746 Ok(results)
747 }
748
749 #[allow(dead_code)] async fn compute_tensor_batch_gpu(
761 &self,
762 x_batch: &[Multivector<3, 0, 0>],
763 y_batch: &[Multivector<3, 0, 0>],
764 z_batch: &[Multivector<3, 0, 0>],
765 ) -> Result<Vec<f64>, GpuError> {
766 let batch_size = x_batch.len();
767
768 let x_data: Vec<f32> = x_batch
770 .iter()
771 .flat_map(|mv| {
772 vec![
773 mv.vector_component(0) as f32,
774 mv.vector_component(1) as f32,
775 mv.vector_component(2) as f32,
776 ]
777 })
778 .collect();
779
780 let y_data: Vec<f32> = y_batch
781 .iter()
782 .flat_map(|mv| {
783 vec![
784 mv.vector_component(0) as f32,
785 mv.vector_component(1) as f32,
786 mv.vector_component(2) as f32,
787 ]
788 })
789 .collect();
790
791 let z_data: Vec<f32> = z_batch
792 .iter()
793 .flat_map(|mv| {
794 vec![
795 mv.vector_component(0) as f32,
796 mv.vector_component(1) as f32,
797 mv.vector_component(2) as f32,
798 ]
799 })
800 .collect();
801
802 let x_buffer = self
804 .device
805 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
806 label: Some("X Batch Buffer"),
807 contents: bytemuck::cast_slice(&x_data),
808 usage: wgpu::BufferUsages::STORAGE,
809 });
810
811 let y_buffer = self
812 .device
813 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
814 label: Some("Y Batch Buffer"),
815 contents: bytemuck::cast_slice(&y_data),
816 usage: wgpu::BufferUsages::STORAGE,
817 });
818
819 let z_buffer = self
820 .device
821 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
822 label: Some("Z Batch Buffer"),
823 contents: bytemuck::cast_slice(&z_data),
824 usage: wgpu::BufferUsages::STORAGE,
825 });
826
827 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
828 label: Some("Output Buffer"),
829 size: (batch_size * 4) as u64, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
831 mapped_at_creation: false,
832 });
833
834 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
835 label: Some("Staging Buffer"),
836 size: (batch_size * 4) as u64,
837 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
838 mapped_at_creation: false,
839 });
840
841 let bind_group_layout = self.tensor_pipeline.get_bind_group_layout(0);
843 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
844 label: Some("Tensor Compute Bind Group"),
845 layout: &bind_group_layout,
846 entries: &[
847 wgpu::BindGroupEntry {
848 binding: 0,
849 resource: x_buffer.as_entire_binding(),
850 },
851 wgpu::BindGroupEntry {
852 binding: 1,
853 resource: y_buffer.as_entire_binding(),
854 },
855 wgpu::BindGroupEntry {
856 binding: 2,
857 resource: z_buffer.as_entire_binding(),
858 },
859 wgpu::BindGroupEntry {
860 binding: 3,
861 resource: output_buffer.as_entire_binding(),
862 },
863 ],
864 });
865
866 let mut encoder = self
868 .device
869 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
870 label: Some("Tensor Compute Encoder"),
871 });
872
873 {
874 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
875 label: Some("Tensor Compute Pass"),
876 timestamp_writes: None,
877 });
878 compute_pass.set_pipeline(&self.tensor_pipeline);
879 compute_pass.set_bind_group(0, &bind_group, &[]);
880 let workgroup_count = batch_size.div_ceil(64); compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
882 }
883
884 encoder.copy_buffer_to_buffer(
885 &output_buffer,
886 0,
887 &staging_buffer,
888 0,
889 (batch_size * 4) as u64,
890 );
891
892 self.queue.submit(std::iter::once(encoder.finish()));
893
894 let buffer_slice = staging_buffer.slice(..);
896 let (sender, receiver) = futures::channel::oneshot::channel();
897 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
898 let _ = sender.send(result);
899 });
900
901 self.device.poll(wgpu::Maintain::Wait);
902
903 receiver
904 .await
905 .map_err(|_| GpuError::BufferError("Failed to receive buffer map result".to_string()))?
906 .map_err(|e| GpuError::BufferError(format!("Buffer mapping failed: {:?}", e)))?;
907
908 let data = buffer_slice.get_mapped_range();
909 let result_f32: &[f32] = bytemuck::cast_slice(&data);
910 let results: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
911
912 drop(data);
913 staging_buffer.unmap();
914
915 Ok(results)
916 }
917
918 fn create_tensor_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
919 let shader_source = TENSOR_COMPUTE_SHADER;
920 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
921 label: Some("Tensor Compute Shader"),
922 source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(shader_source)),
923 });
924
925 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
926 label: Some("Tensor Compute Pipeline"),
927 layout: None,
928 module: &shader,
929 entry_point: "main",
930 });
931
932 Ok(compute_pipeline)
933 }
934
935 fn create_fisher_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
936 Self::create_tensor_pipeline(device)
938 }
939
940 fn create_divergence_pipeline(
941 device: &wgpu::Device,
942 ) -> Result<wgpu::ComputePipeline, GpuError> {
943 Self::create_tensor_pipeline(device)
945 }
946}
947
948pub struct GpuDeviceInfo {
950 is_gpu: bool,
951 #[allow(dead_code)]
952 description: String,
953}
954
955impl GpuDeviceInfo {
956 fn new(is_gpu: bool, description: &str) -> Self {
957 Self {
958 is_gpu,
959 description: description.to_string(),
960 }
961 }
962
963 pub fn is_gpu(&self) -> bool {
964 self.is_gpu
965 }
966
967 pub fn supports_webgpu(&self) -> bool {
968 self.is_gpu
969 }
970
971 pub fn is_initialized(&self) -> bool {
972 true
973 }
974}
975
976pub struct GpuFisherMatrix {
978 matrix: Vec<Vec<f64>>,
979}
980
981impl GpuFisherMatrix {
982 fn new(matrix: Vec<Vec<f64>>) -> Self {
983 Self { matrix }
984 }
985
986 pub async fn eigenvalues(&self) -> Result<Vec<f64>, GpuError> {
987 let mut eigenvals = Vec::new();
989 for i in 0..self.matrix.len() {
990 if i < self.matrix[i].len() {
991 eigenvals.push(self.matrix[i][i]);
992 }
993 }
994 Ok(eigenvals)
995 }
996}
997
998const TENSOR_COMPUTE_SHADER: &str = r#"
1000@group(0) @binding(0)
1001var<storage, read> x_batch: array<vec3<f32>>;
1002
1003@group(0) @binding(1)
1004var<storage, read> y_batch: array<vec3<f32>>;
1005
1006@group(0) @binding(2)
1007var<storage, read> z_batch: array<vec3<f32>>;
1008
1009@group(0) @binding(3)
1010var<storage, read_write> output: array<f32>;
1011
1012@compute @workgroup_size(64)
1013fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1014 let idx = global_id.x;
1015 if (idx >= arrayLength(&x_batch)) {
1016 return;
1017 }
1018
1019 let x = x_batch[idx];
1020 let y = y_batch[idx];
1021 let z = z_batch[idx];
1022
1023 // Compute scalar triple product: x · (y × z)
1024 let cross_yz = cross(y, z);
1025 let scalar_triple = dot(x, cross_yz);
1026
1027 output[idx] = scalar_triple;
1028}
1029"#;
1030
1031#[cfg(test)]
1032mod tests {
1033 use super::*;
1034
1035 #[test]
1036 fn test_should_use_gpu() {
1037 assert!(!GpuCliffordAlgebra::should_use_gpu(10));
1038 assert!(GpuCliffordAlgebra::should_use_gpu(1000));
1039 }
1040
1041 #[tokio::test]
1042 async fn test_gpu_info_geometry_creation() {
1043 if std::env::var("CI").is_ok()
1045 || std::env::var("GITHUB_ACTIONS").is_ok()
1046 || std::env::var("DISPLAY").is_err()
1047 {
1048 println!("Skipping GPU test in CI environment");
1049 return;
1050 }
1051
1052 match GpuInfoGeometry::new().await {
1054 Ok(_) => {
1055 println!("GPU initialization successful");
1057 }
1058 Err(GpuError::InitializationError(_)) => {
1059 println!("GPU initialization failed - no GPU available");
1061 }
1062 Err(e) => panic!("Unexpected error: {:?}", e),
1063 }
1064 }
1065}