1pub mod adaptive;
4#[cfg(feature = "automata")]
5pub mod automata;
6pub mod benchmarks;
7#[cfg(feature = "calculus")]
8pub mod calculus;
9#[cfg(feature = "dual")]
10pub mod dual;
11#[cfg(feature = "enumerative")]
12pub mod enumerative;
13#[cfg(feature = "measure")]
18pub mod measure;
19pub mod multi_gpu;
20pub mod network;
21pub mod performance;
22pub mod relativistic;
23pub mod shaders;
24pub mod timeline;
25pub mod unified;
30pub mod verification;
31
32pub use adaptive::{
33 AdaptiveVerificationError, AdaptiveVerificationLevel, AdaptiveVerifier, CpuFeatures,
34 GpuBackend, PlatformCapabilities, PlatformPerformanceProfile, VerificationPlatform,
35 WasmEnvironment,
36};
37use amari_core::Multivector;
38use amari_info_geom::amari_chentsov_tensor;
39pub use benchmarks::{
40 AmariMultiGpuBenchmarks, BenchmarkConfig, BenchmarkResult, BenchmarkRunner,
41 BenchmarkSuiteResults, BenchmarkSummary, ScalingAnalysis,
42};
43use bytemuck::{Pod, Zeroable};
44#[cfg(feature = "calculus")]
45pub use calculus::GpuCalculus;
46#[cfg(feature = "measure")]
47pub use measure::{
48 GpuIntegrator, GpuMonteCarloIntegrator, GpuMultidimIntegrator, GpuParametricDensity,
49 GpuTropicalMeasure,
50};
51pub use multi_gpu::{
52 ComputeIntensity, DeviceCapabilities, DeviceId, DeviceWorkload, GpuArchitecture, GpuDevice,
53 IntelligentLoadBalancer, LoadBalancingStrategy, MultiGpuBarrier, PerformanceRecord,
54 PerformanceStats, SynchronizationManager, Workload, WorkloadCoordinator,
55};
56pub use network::{AdaptiveNetworkCompute, GpuGeometricNetwork, GpuNetworkError, GpuNetworkResult};
57pub use performance::{
58 AdaptiveDispatchPolicy, CalibrationResult, GpuProfile, GpuProfiler, WorkgroupConfig,
59 WorkgroupOptimizer,
60};
61pub use relativistic::{
62 GpuRelativisticParticle, GpuRelativisticPhysics, GpuSpacetimeVector, GpuTrajectoryParams,
63};
64pub use shaders::{ShaderLibrary, DUAL_SHADERS, FUSION_SHADERS, TROPICAL_SHADERS};
65use thiserror::Error;
66pub use timeline::{
67 BottleneckAnalysis, DeviceUtilizationStats, GpuTimelineAnalyzer, MultiGpuPerformanceMonitor,
68 OptimizationRecommendation, PerformanceAnalysisReport, PerformanceBottleneck,
69 PerformanceSummary, RecommendationPriority, SynchronizationAnalysis, TimelineEvent,
70 UtilizationAnalysis,
71};
72pub use unified::{
73 BufferPoolStats, EnhancedGpuBufferPool, GpuAccelerated, GpuContext, GpuDispatcher,
74 GpuOperationParams, GpuParam, MultiGpuStats, PoolEntryStats, SharedGpuContext, UnifiedGpuError,
75 UnifiedGpuResult,
76};
77pub use verification::{
78 GpuBoundaryVerifier, GpuVerificationError, RelativisticVerifier, StatisticalGpuVerifier,
79 VerificationConfig, VerificationStrategy, VerifiedMultivector,
80};
81use wgpu::util::DeviceExt;
82
83#[derive(Error, Debug)]
84pub enum GpuError {
85 #[error("Failed to initialize GPU: {0}")]
86 InitializationError(String),
87
88 #[error("GPU buffer error: {0}")]
89 BufferError(String),
90
91 #[error("Shader compilation error: {0}")]
92 ShaderError(String),
93}
94
95pub struct GpuCliffordAlgebra {
97 device: wgpu::Device,
98 queue: wgpu::Queue,
99 compute_pipeline: wgpu::ComputePipeline,
100 cayley_buffer: wgpu::Buffer,
101 #[allow(dead_code)]
102 dim: usize,
103 basis_count: usize,
104}
105
106impl GpuCliffordAlgebra {
107 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Result<Self, GpuError> {
109 let instance = wgpu::Instance::default();
110
111 let adapter = instance
112 .request_adapter(&wgpu::RequestAdapterOptions {
113 power_preference: wgpu::PowerPreference::HighPerformance,
114 compatible_surface: None,
115 force_fallback_adapter: false,
116 })
117 .await
118 .ok_or_else(|| GpuError::InitializationError("No GPU adapter found".to_string()))?;
119
120 let (device, queue) = adapter
121 .request_device(
122 &wgpu::DeviceDescriptor {
123 label: Some("Amari GPU Device"),
124 required_features: wgpu::Features::empty(),
125 required_limits: wgpu::Limits::default(),
126 },
127 None,
128 )
129 .await
130 .map_err(|e| GpuError::InitializationError(e.to_string()))?;
131
132 let dim = P + Q + R;
133 let basis_count = 1 << dim;
134
135 let cayley_table = Self::generate_cayley_table::<P, Q, R>();
137 let cayley_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
138 label: Some("Cayley Table"),
139 contents: bytemuck::cast_slice(&cayley_table),
140 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
141 });
142
143 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
145 label: Some("Geometric Product Shader"),
146 source: wgpu::ShaderSource::Wgsl(GEOMETRIC_PRODUCT_SHADER.into()),
147 });
148
149 let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
150 label: Some("Compute Bind Group Layout"),
151 entries: &[
152 wgpu::BindGroupLayoutEntry {
153 binding: 0,
154 visibility: wgpu::ShaderStages::COMPUTE,
155 ty: wgpu::BindingType::Buffer {
156 ty: wgpu::BufferBindingType::Storage { read_only: true },
157 has_dynamic_offset: false,
158 min_binding_size: None,
159 },
160 count: None,
161 },
162 wgpu::BindGroupLayoutEntry {
163 binding: 1,
164 visibility: wgpu::ShaderStages::COMPUTE,
165 ty: wgpu::BindingType::Buffer {
166 ty: wgpu::BufferBindingType::Storage { read_only: true },
167 has_dynamic_offset: false,
168 min_binding_size: None,
169 },
170 count: None,
171 },
172 wgpu::BindGroupLayoutEntry {
173 binding: 2,
174 visibility: wgpu::ShaderStages::COMPUTE,
175 ty: wgpu::BindingType::Buffer {
176 ty: wgpu::BufferBindingType::Storage { read_only: true },
177 has_dynamic_offset: false,
178 min_binding_size: None,
179 },
180 count: None,
181 },
182 wgpu::BindGroupLayoutEntry {
183 binding: 3,
184 visibility: wgpu::ShaderStages::COMPUTE,
185 ty: wgpu::BindingType::Buffer {
186 ty: wgpu::BufferBindingType::Storage { read_only: false },
187 has_dynamic_offset: false,
188 min_binding_size: None,
189 },
190 count: None,
191 },
192 ],
193 });
194
195 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
196 label: Some("Compute Pipeline Layout"),
197 bind_group_layouts: &[&bind_group_layout],
198 push_constant_ranges: &[],
199 });
200
201 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
202 label: Some("Geometric Product Pipeline"),
203 layout: Some(&pipeline_layout),
204 module: &shader,
205 entry_point: "main",
206 });
207
208 Ok(Self {
209 device,
210 queue,
211 compute_pipeline,
212 cayley_buffer,
213 dim,
214 basis_count,
215 })
216 }
217
218 fn generate_cayley_table<const P: usize, const Q: usize, const R: usize>() -> Vec<CayleyEntry> {
220 use amari_core::cayley::CayleyTable;
221
222 let table = CayleyTable::<P, Q, R>::get();
223 let basis_count = 1 << (P + Q + R);
224 let mut flat_table = Vec::with_capacity(basis_count * basis_count);
225
226 for i in 0..basis_count {
227 for j in 0..basis_count {
228 let (sign, index) = table.get_product(i, j);
229 flat_table.push(CayleyEntry {
230 sign: sign as f32,
231 index: index as u32,
232 });
233 }
234 }
235
236 flat_table
237 }
238
239 pub async fn batch_geometric_product(
241 &self,
242 a_batch: &[f64],
243 b_batch: &[f64],
244 ) -> Result<Vec<f64>, GpuError> {
245 let batch_size = a_batch.len() / self.basis_count;
246
247 if a_batch.len() != b_batch.len() {
248 return Err(GpuError::BufferError(
249 "Input batches must have same size".to_string(),
250 ));
251 }
252
253 let a_f32: Vec<f32> = a_batch.iter().map(|&x| x as f32).collect();
255 let b_f32: Vec<f32> = b_batch.iter().map(|&x| x as f32).collect();
256
257 let a_buffer = self
259 .device
260 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
261 label: Some("A Buffer"),
262 contents: bytemuck::cast_slice(&a_f32),
263 usage: wgpu::BufferUsages::STORAGE,
264 });
265
266 let b_buffer = self
267 .device
268 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
269 label: Some("B Buffer"),
270 contents: bytemuck::cast_slice(&b_f32),
271 usage: wgpu::BufferUsages::STORAGE,
272 });
273
274 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
275 label: Some("Output Buffer"),
276 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
277 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
278 mapped_at_creation: false,
279 });
280
281 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
282 label: Some("Staging Buffer"),
283 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
284 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
285 mapped_at_creation: false,
286 });
287
288 let bind_group_layout = self.compute_pipeline.get_bind_group_layout(0);
290 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
291 label: Some("Compute Bind Group"),
292 layout: &bind_group_layout,
293 entries: &[
294 wgpu::BindGroupEntry {
295 binding: 0,
296 resource: self.cayley_buffer.as_entire_binding(),
297 },
298 wgpu::BindGroupEntry {
299 binding: 1,
300 resource: a_buffer.as_entire_binding(),
301 },
302 wgpu::BindGroupEntry {
303 binding: 2,
304 resource: b_buffer.as_entire_binding(),
305 },
306 wgpu::BindGroupEntry {
307 binding: 3,
308 resource: output_buffer.as_entire_binding(),
309 },
310 ],
311 });
312
313 let mut encoder = self
315 .device
316 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
317 label: Some("Compute Encoder"),
318 });
319
320 {
321 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
322 label: Some("Compute Pass"),
323 timestamp_writes: None,
324 });
325
326 compute_pass.set_pipeline(&self.compute_pipeline);
327 compute_pass.set_bind_group(0, &bind_group, &[]);
328 compute_pass.dispatch_workgroups(batch_size as u32, 1, 1);
329 }
330
331 encoder.copy_buffer_to_buffer(
332 &output_buffer,
333 0,
334 &staging_buffer,
335 0,
336 (a_batch.len() * std::mem::size_of::<f32>()) as u64,
337 );
338
339 self.queue.submit(Some(encoder.finish()));
340
341 let buffer_slice = staging_buffer.slice(..);
343 let (sender, receiver) = futures::channel::oneshot::channel();
344 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
345 sender.send(result).unwrap();
346 });
347
348 self.device.poll(wgpu::Maintain::Wait);
349 receiver
350 .await
351 .unwrap()
352 .map_err(|e| GpuError::BufferError(e.to_string()))?;
353
354 let data = buffer_slice.get_mapped_range();
355 let result_f32: &[f32] = bytemuck::cast_slice(&data);
356 let result: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
357
358 drop(data);
359 staging_buffer.unmap();
360
361 Ok(result)
362 }
363
364 pub fn should_use_gpu(operation_count: usize) -> bool {
366 operation_count >= 100
368 }
369}
370
371#[repr(C)]
373#[derive(Copy, Clone, Pod, Zeroable)]
374struct CayleyEntry {
375 sign: f32,
376 index: u32,
377}
378
379const GEOMETRIC_PRODUCT_SHADER: &str = r#"
381struct CayleyEntry {
382 sign: f32,
383 index: u32,
384}
385
386@group(0) @binding(0)
387var<storage, read> cayley_table: array<CayleyEntry>;
388
389@group(0) @binding(1)
390var<storage, read> a_batch: array<f32>;
391
392@group(0) @binding(2)
393var<storage, read> b_batch: array<f32>;
394
395@group(0) @binding(3)
396var<storage, read_write> output: array<f32>;
397
398const BASIS_COUNT: u32 = 8u; // For 3D Clifford algebra
399
400@compute @workgroup_size(1)
401fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
402 let batch_idx = global_id.x;
403 let offset = batch_idx * BASIS_COUNT;
404
405 // Clear output
406 for (var k = 0u; k < BASIS_COUNT; k = k + 1u) {
407 output[offset + k] = 0.0;
408 }
409
410 // Compute geometric product
411 for (var i = 0u; i < BASIS_COUNT; i = i + 1u) {
412 let a_coeff = a_batch[offset + i];
413 if (abs(a_coeff) < 1e-14) {
414 continue;
415 }
416
417 for (var j = 0u; j < BASIS_COUNT; j = j + 1u) {
418 let b_coeff = b_batch[offset + j];
419 if (abs(b_coeff) < 1e-14) {
420 continue;
421 }
422
423 let table_idx = i * BASIS_COUNT + j;
424 let entry = cayley_table[table_idx];
425 output[offset + entry.index] += entry.sign * a_coeff * b_coeff;
426 }
427 }
428}
429"#;
430
431pub struct AdaptiveCompute {
433 gpu: Option<GpuCliffordAlgebra>,
434}
435
436impl AdaptiveCompute {
437 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Self {
439 let gpu = GpuCliffordAlgebra::new::<P, Q, R>().await.ok();
440 Self { gpu }
441 }
442
443 pub async fn geometric_product<const P: usize, const Q: usize, const R: usize>(
445 &self,
446 a: &Multivector<P, Q, R>,
447 b: &Multivector<P, Q, R>,
448 ) -> Multivector<P, Q, R> {
449 a.geometric_product(b)
451 }
452
453 pub async fn batch_geometric_product(
455 &self,
456 a_batch: &[f64],
457 b_batch: &[f64],
458 ) -> Result<Vec<f64>, GpuError> {
459 let batch_size = a_batch.len() / 8; if let Some(gpu) = &self.gpu {
462 if GpuCliffordAlgebra::should_use_gpu(batch_size) {
463 return gpu.batch_geometric_product(a_batch, b_batch).await;
464 }
465 }
466
467 let mut result = Vec::with_capacity(a_batch.len());
469 for i in 0..batch_size {
470 let start = i * 8;
471 let end = start + 8;
472
473 let a = Multivector::<3, 0, 0>::from_coefficients(a_batch[start..end].to_vec());
474 let b = Multivector::<3, 0, 0>::from_coefficients(b_batch[start..end].to_vec());
475 let product = a.geometric_product(&b);
476
477 for j in 0..8 {
478 result.push(product.get(j));
479 }
480 }
481
482 Ok(result)
483 }
484}
485
486pub struct GpuInfoGeometry {
497 device: wgpu::Device,
498 queue: wgpu::Queue,
499 tensor_pipeline: wgpu::ComputePipeline,
500 #[allow(dead_code)]
501 fisher_pipeline: wgpu::ComputePipeline,
502 #[allow(dead_code)]
503 divergence_pipeline: wgpu::ComputePipeline,
504}
505
506impl GpuInfoGeometry {
507 pub async fn new() -> Result<Self, GpuError> {
509 let instance = wgpu::Instance::default();
510
511 let adapter = if let Some(adapter) = instance
513 .request_adapter(&wgpu::RequestAdapterOptions {
514 power_preference: wgpu::PowerPreference::HighPerformance,
515 compatible_surface: None,
516 force_fallback_adapter: false,
517 })
518 .await
519 {
520 adapter
521 } else if let Some(adapter) = instance
522 .request_adapter(&wgpu::RequestAdapterOptions {
523 power_preference: wgpu::PowerPreference::LowPower,
524 compatible_surface: None,
525 force_fallback_adapter: false,
526 })
527 .await
528 {
529 adapter
530 } else if let Some(adapter) = instance
531 .request_adapter(&wgpu::RequestAdapterOptions {
532 power_preference: wgpu::PowerPreference::None,
533 compatible_surface: None,
534 force_fallback_adapter: true,
535 })
536 .await
537 {
538 adapter
539 } else {
540 return Err(GpuError::InitializationError(
541 "No GPU adapter found".to_string(),
542 ));
543 };
544
545 let (device, queue) = adapter
546 .request_device(
547 &wgpu::DeviceDescriptor {
548 label: Some("Amari GPU Info Geometry Device"),
549 required_features: wgpu::Features::empty(),
550 required_limits: wgpu::Limits::default(),
551 },
552 None,
553 )
554 .await
555 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
556
557 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
559 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
560 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
561
562 Ok(Self {
563 device,
564 queue,
565 tensor_pipeline,
566 fisher_pipeline,
567 divergence_pipeline,
568 })
569 }
570
571 pub async fn new_with_device_preference(device_type: &str) -> Result<Self, GpuError> {
573 let (power_preference, force_fallback) = match device_type {
574 "high-performance" => (wgpu::PowerPreference::HighPerformance, false),
575 "low-power" => (wgpu::PowerPreference::LowPower, false),
576 "fallback" => (wgpu::PowerPreference::None, true),
577 _ => {
578 return Err(GpuError::InitializationError(
579 "Invalid device type".to_string(),
580 ))
581 }
582 };
583
584 let instance = wgpu::Instance::default();
585
586 let adapter = instance
587 .request_adapter(&wgpu::RequestAdapterOptions {
588 power_preference,
589 compatible_surface: None,
590 force_fallback_adapter: force_fallback,
591 })
592 .await
593 .ok_or_else(|| {
594 GpuError::InitializationError("No suitable adapter found".to_string())
595 })?;
596
597 let (device, queue) = adapter
598 .request_device(
599 &wgpu::DeviceDescriptor {
600 label: Some("Amari GPU Info Geometry Device"),
601 required_features: wgpu::Features::empty(),
602 required_limits: wgpu::Limits::default(),
603 },
604 None,
605 )
606 .await
607 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
608
609 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
610 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
611 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
612
613 Ok(Self {
614 device,
615 queue,
616 tensor_pipeline,
617 fisher_pipeline,
618 divergence_pipeline,
619 })
620 }
621
622 pub async fn amari_chentsov_tensor(
624 &self,
625 x: &Multivector<3, 0, 0>,
626 y: &Multivector<3, 0, 0>,
627 z: &Multivector<3, 0, 0>,
628 ) -> Result<f64, GpuError> {
629 Ok(amari_chentsov_tensor(x, y, z))
631 }
632
633 pub async fn amari_chentsov_tensor_batch(
644 &self,
645 x_batch: &[Multivector<3, 0, 0>],
646 y_batch: &[Multivector<3, 0, 0>],
647 z_batch: &[Multivector<3, 0, 0>],
648 ) -> Result<Vec<f64>, GpuError> {
649 let batch_size = x_batch.len();
650 if batch_size == 0 {
651 return Ok(Vec::new());
652 }
653
654 if batch_size < 100 {
656 let results = x_batch
657 .iter()
658 .zip(y_batch.iter())
659 .zip(z_batch.iter())
660 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
661 .collect();
662 return Ok(results);
663 }
664
665 let results = x_batch
669 .iter()
670 .zip(y_batch.iter())
671 .zip(z_batch.iter())
672 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
673 .collect();
674 Ok(results)
675 }
676
677 pub async fn amari_chentsov_tensor_from_typed_arrays(
679 &self,
680 flat_data: &[f64],
681 batch_size: usize,
682 ) -> Result<Vec<f64>, GpuError> {
683 if flat_data.len() != batch_size * 9 {
684 return Err(GpuError::BufferError("Invalid flat data size".to_string()));
685 }
686
687 let mut x_batch = Vec::with_capacity(batch_size);
689 let mut y_batch = Vec::with_capacity(batch_size);
690 let mut z_batch = Vec::with_capacity(batch_size);
691
692 for i in 0..batch_size {
693 let base = i * 9;
694 let mut x = Multivector::zero();
695 let mut y = Multivector::zero();
696 let mut z = Multivector::zero();
697
698 x.set_vector_component(0, flat_data[base]);
700 x.set_vector_component(1, flat_data[base + 1]);
701 x.set_vector_component(2, flat_data[base + 2]);
702
703 y.set_vector_component(0, flat_data[base + 3]);
704 y.set_vector_component(1, flat_data[base + 4]);
705 y.set_vector_component(2, flat_data[base + 5]);
706
707 z.set_vector_component(0, flat_data[base + 6]);
708 z.set_vector_component(1, flat_data[base + 7]);
709 z.set_vector_component(2, flat_data[base + 8]);
710
711 x_batch.push(x);
712 y_batch.push(y);
713 z_batch.push(z);
714 }
715
716 self.amari_chentsov_tensor_batch(&x_batch, &y_batch, &z_batch)
717 .await
718 }
719
720 pub async fn device_info(&self) -> Result<GpuDeviceInfo, GpuError> {
722 Ok(GpuDeviceInfo::new(true, "WebGPU Device"))
723 }
724
725 pub async fn memory_usage(&self) -> Result<u64, GpuError> {
727 Ok(1024 * 1024) }
730
731 pub async fn fisher_information_matrix(
733 &self,
734 _parameters: &[f64],
735 ) -> Result<GpuFisherMatrix, GpuError> {
736 Ok(GpuFisherMatrix::new(vec![vec![1.0, 0.0], vec![0.0, 1.0]]))
738 }
739
740 pub async fn bregman_divergence_batch(
742 &self,
743 p_batch: &[Vec<f64>],
744 q_batch: &[Vec<f64>],
745 ) -> Result<Vec<f64>, GpuError> {
746 let results = p_batch
748 .iter()
749 .zip(q_batch.iter())
750 .map(|(p, q)| {
751 p.iter()
753 .zip(q.iter())
754 .map(|(pi, qi)| {
755 if *pi > 0.0 && *qi > 0.0 {
756 pi * (pi / qi).ln()
757 } else {
758 0.0
759 }
760 })
761 .sum()
762 })
763 .collect();
764 Ok(results)
765 }
766
767 #[allow(dead_code)] async fn compute_tensor_batch_gpu(
779 &self,
780 x_batch: &[Multivector<3, 0, 0>],
781 y_batch: &[Multivector<3, 0, 0>],
782 z_batch: &[Multivector<3, 0, 0>],
783 ) -> Result<Vec<f64>, GpuError> {
784 let batch_size = x_batch.len();
785
786 let x_data: Vec<f32> = x_batch
788 .iter()
789 .flat_map(|mv| {
790 vec![
791 mv.vector_component(0) as f32,
792 mv.vector_component(1) as f32,
793 mv.vector_component(2) as f32,
794 ]
795 })
796 .collect();
797
798 let y_data: Vec<f32> = y_batch
799 .iter()
800 .flat_map(|mv| {
801 vec![
802 mv.vector_component(0) as f32,
803 mv.vector_component(1) as f32,
804 mv.vector_component(2) as f32,
805 ]
806 })
807 .collect();
808
809 let z_data: Vec<f32> = z_batch
810 .iter()
811 .flat_map(|mv| {
812 vec![
813 mv.vector_component(0) as f32,
814 mv.vector_component(1) as f32,
815 mv.vector_component(2) as f32,
816 ]
817 })
818 .collect();
819
820 let x_buffer = self
822 .device
823 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
824 label: Some("X Batch Buffer"),
825 contents: bytemuck::cast_slice(&x_data),
826 usage: wgpu::BufferUsages::STORAGE,
827 });
828
829 let y_buffer = self
830 .device
831 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
832 label: Some("Y Batch Buffer"),
833 contents: bytemuck::cast_slice(&y_data),
834 usage: wgpu::BufferUsages::STORAGE,
835 });
836
837 let z_buffer = self
838 .device
839 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
840 label: Some("Z Batch Buffer"),
841 contents: bytemuck::cast_slice(&z_data),
842 usage: wgpu::BufferUsages::STORAGE,
843 });
844
845 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
846 label: Some("Output Buffer"),
847 size: (batch_size * 4) as u64, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
849 mapped_at_creation: false,
850 });
851
852 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
853 label: Some("Staging Buffer"),
854 size: (batch_size * 4) as u64,
855 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
856 mapped_at_creation: false,
857 });
858
859 let bind_group_layout = self.tensor_pipeline.get_bind_group_layout(0);
861 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
862 label: Some("Tensor Compute Bind Group"),
863 layout: &bind_group_layout,
864 entries: &[
865 wgpu::BindGroupEntry {
866 binding: 0,
867 resource: x_buffer.as_entire_binding(),
868 },
869 wgpu::BindGroupEntry {
870 binding: 1,
871 resource: y_buffer.as_entire_binding(),
872 },
873 wgpu::BindGroupEntry {
874 binding: 2,
875 resource: z_buffer.as_entire_binding(),
876 },
877 wgpu::BindGroupEntry {
878 binding: 3,
879 resource: output_buffer.as_entire_binding(),
880 },
881 ],
882 });
883
884 let mut encoder = self
886 .device
887 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
888 label: Some("Tensor Compute Encoder"),
889 });
890
891 {
892 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
893 label: Some("Tensor Compute Pass"),
894 timestamp_writes: None,
895 });
896 compute_pass.set_pipeline(&self.tensor_pipeline);
897 compute_pass.set_bind_group(0, &bind_group, &[]);
898 let workgroup_count = batch_size.div_ceil(64); compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
900 }
901
902 encoder.copy_buffer_to_buffer(
903 &output_buffer,
904 0,
905 &staging_buffer,
906 0,
907 (batch_size * 4) as u64,
908 );
909
910 self.queue.submit(std::iter::once(encoder.finish()));
911
912 let buffer_slice = staging_buffer.slice(..);
914 let (sender, receiver) = futures::channel::oneshot::channel();
915 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
916 let _ = sender.send(result);
917 });
918
919 self.device.poll(wgpu::Maintain::Wait);
920
921 receiver
922 .await
923 .map_err(|_| GpuError::BufferError("Failed to receive buffer map result".to_string()))?
924 .map_err(|e| GpuError::BufferError(format!("Buffer mapping failed: {:?}", e)))?;
925
926 let data = buffer_slice.get_mapped_range();
927 let result_f32: &[f32] = bytemuck::cast_slice(&data);
928 let results: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
929
930 drop(data);
931 staging_buffer.unmap();
932
933 Ok(results)
934 }
935
936 fn create_tensor_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
937 let shader_source = TENSOR_COMPUTE_SHADER;
938 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
939 label: Some("Tensor Compute Shader"),
940 source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(shader_source)),
941 });
942
943 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
944 label: Some("Tensor Compute Pipeline"),
945 layout: None,
946 module: &shader,
947 entry_point: "main",
948 });
949
950 Ok(compute_pipeline)
951 }
952
953 fn create_fisher_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
954 Self::create_tensor_pipeline(device)
956 }
957
958 fn create_divergence_pipeline(
959 device: &wgpu::Device,
960 ) -> Result<wgpu::ComputePipeline, GpuError> {
961 Self::create_tensor_pipeline(device)
963 }
964}
965
966pub struct GpuDeviceInfo {
968 is_gpu: bool,
969 #[allow(dead_code)]
970 description: String,
971}
972
973impl GpuDeviceInfo {
974 fn new(is_gpu: bool, description: &str) -> Self {
975 Self {
976 is_gpu,
977 description: description.to_string(),
978 }
979 }
980
981 pub fn is_gpu(&self) -> bool {
982 self.is_gpu
983 }
984
985 pub fn supports_webgpu(&self) -> bool {
986 self.is_gpu
987 }
988
989 pub fn is_initialized(&self) -> bool {
990 true
991 }
992}
993
994pub struct GpuFisherMatrix {
996 matrix: Vec<Vec<f64>>,
997}
998
999impl GpuFisherMatrix {
1000 fn new(matrix: Vec<Vec<f64>>) -> Self {
1001 Self { matrix }
1002 }
1003
1004 pub async fn eigenvalues(&self) -> Result<Vec<f64>, GpuError> {
1005 let mut eigenvals = Vec::new();
1007 for i in 0..self.matrix.len() {
1008 if i < self.matrix[i].len() {
1009 eigenvals.push(self.matrix[i][i]);
1010 }
1011 }
1012 Ok(eigenvals)
1013 }
1014}
1015
1016const TENSOR_COMPUTE_SHADER: &str = r#"
1018@group(0) @binding(0)
1019var<storage, read> x_batch: array<vec3<f32>>;
1020
1021@group(0) @binding(1)
1022var<storage, read> y_batch: array<vec3<f32>>;
1023
1024@group(0) @binding(2)
1025var<storage, read> z_batch: array<vec3<f32>>;
1026
1027@group(0) @binding(3)
1028var<storage, read_write> output: array<f32>;
1029
1030@compute @workgroup_size(64)
1031fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1032 let idx = global_id.x;
1033 if (idx >= arrayLength(&x_batch)) {
1034 return;
1035 }
1036
1037 let x = x_batch[idx];
1038 let y = y_batch[idx];
1039 let z = z_batch[idx];
1040
1041 // Compute scalar triple product: x · (y × z)
1042 let cross_yz = cross(y, z);
1043 let scalar_triple = dot(x, cross_yz);
1044
1045 output[idx] = scalar_triple;
1046}
1047"#;
1048
1049#[cfg(test)]
1050mod tests {
1051 use super::*;
1052
1053 #[test]
1054 fn test_should_use_gpu() {
1055 assert!(!GpuCliffordAlgebra::should_use_gpu(10));
1056 assert!(GpuCliffordAlgebra::should_use_gpu(1000));
1057 }
1058
1059 #[tokio::test]
1060 async fn test_gpu_info_geometry_creation() {
1061 if std::env::var("CI").is_ok()
1063 || std::env::var("GITHUB_ACTIONS").is_ok()
1064 || std::env::var("DISPLAY").is_err()
1065 {
1066 println!("Skipping GPU test in CI environment");
1067 return;
1068 }
1069
1070 match GpuInfoGeometry::new().await {
1072 Ok(_) => {
1073 println!("GPU initialization successful");
1075 }
1076 Err(GpuError::InitializationError(_)) => {
1077 println!("GPU initialization failed - no GPU available");
1079 }
1080 Err(e) => panic!("Unexpected error: {:?}", e),
1081 }
1082 }
1083}