1pub mod adaptive;
103#[cfg(feature = "automata")]
104pub mod automata;
105pub mod benchmarks;
106#[cfg(feature = "calculus")]
107pub mod calculus;
108#[cfg(feature = "dual")]
109pub mod dual;
110#[cfg(feature = "enumerative")]
111pub mod enumerative;
112#[cfg(feature = "holographic")]
117pub mod holographic;
118#[cfg(feature = "measure")]
119pub mod measure;
120pub mod multi_gpu;
121pub mod network;
122pub mod performance;
123#[cfg(feature = "probabilistic")]
124pub mod probabilistic;
125pub mod relativistic;
126pub mod shaders;
127pub mod timeline;
128pub mod unified;
133pub mod verification;
134
135pub use adaptive::{
136 AdaptiveVerificationError, AdaptiveVerificationLevel, AdaptiveVerifier, CpuFeatures,
137 GpuBackend, PlatformCapabilities, PlatformPerformanceProfile, VerificationPlatform,
138 WasmEnvironment,
139};
140use amari_core::Multivector;
141use amari_info_geom::amari_chentsov_tensor;
142pub use benchmarks::{
143 AmariMultiGpuBenchmarks, BenchmarkConfig, BenchmarkResult, BenchmarkRunner,
144 BenchmarkSuiteResults, BenchmarkSummary, ScalingAnalysis,
145};
146use bytemuck::{Pod, Zeroable};
147#[cfg(feature = "calculus")]
148pub use calculus::GpuCalculus;
149#[cfg(feature = "holographic")]
150pub use holographic::{
151 GpuHolographic, GpuHolographicError, GpuHolographicMemory, GpuHolographicResult,
152};
153#[cfg(feature = "measure")]
154pub use measure::{
155 GpuIntegrator, GpuMonteCarloIntegrator, GpuMultidimIntegrator, GpuParametricDensity,
156 GpuTropicalMeasure,
157};
158pub use multi_gpu::{
159 ComputeIntensity, DeviceCapabilities, DeviceId, DeviceWorkload, GpuArchitecture, GpuDevice,
160 IntelligentLoadBalancer, LoadBalancingStrategy, MultiGpuBarrier, PerformanceRecord,
161 PerformanceStats, SynchronizationManager, Workload, WorkloadCoordinator,
162};
163pub use network::{AdaptiveNetworkCompute, GpuGeometricNetwork, GpuNetworkError, GpuNetworkResult};
164pub use performance::{
165 AdaptiveDispatchPolicy, CalibrationResult, GpuProfile, GpuProfiler, WorkgroupConfig,
166 WorkgroupOptimizer,
167};
168#[cfg(feature = "probabilistic")]
169pub use probabilistic::{GpuProbabilistic, GpuProbabilisticError, GpuProbabilisticResult};
170pub use relativistic::{
171 GpuRelativisticParticle, GpuRelativisticPhysics, GpuSpacetimeVector, GpuTrajectoryParams,
172};
173pub use shaders::{ShaderLibrary, DUAL_SHADERS, FUSION_SHADERS, TROPICAL_SHADERS};
174use thiserror::Error;
175pub use timeline::{
176 BottleneckAnalysis, DeviceUtilizationStats, GpuTimelineAnalyzer, MultiGpuPerformanceMonitor,
177 OptimizationRecommendation, PerformanceAnalysisReport, PerformanceBottleneck,
178 PerformanceSummary, RecommendationPriority, SynchronizationAnalysis, TimelineEvent,
179 UtilizationAnalysis,
180};
181pub use unified::{
182 BufferPoolStats, EnhancedGpuBufferPool, GpuAccelerated, GpuContext, GpuDispatcher,
183 GpuOperationParams, GpuParam, MultiGpuStats, PoolEntryStats, SharedGpuContext, UnifiedGpuError,
184 UnifiedGpuResult,
185};
186pub use verification::{
187 GpuBoundaryVerifier, GpuVerificationError, RelativisticVerifier, StatisticalGpuVerifier,
188 VerificationConfig, VerificationStrategy, VerifiedMultivector,
189};
190use wgpu::util::DeviceExt;
191
192#[derive(Error, Debug)]
193pub enum GpuError {
194 #[error("Failed to initialize GPU: {0}")]
195 InitializationError(String),
196
197 #[error("GPU buffer error: {0}")]
198 BufferError(String),
199
200 #[error("Shader compilation error: {0}")]
201 ShaderError(String),
202}
203
204pub struct GpuCliffordAlgebra {
206 device: wgpu::Device,
207 queue: wgpu::Queue,
208 compute_pipeline: wgpu::ComputePipeline,
209 cayley_buffer: wgpu::Buffer,
210 #[allow(dead_code)]
211 dim: usize,
212 basis_count: usize,
213}
214
215impl GpuCliffordAlgebra {
216 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Result<Self, GpuError> {
218 let instance = wgpu::Instance::default();
219
220 let adapter = instance
221 .request_adapter(&wgpu::RequestAdapterOptions {
222 power_preference: wgpu::PowerPreference::HighPerformance,
223 compatible_surface: None,
224 force_fallback_adapter: false,
225 })
226 .await
227 .ok_or_else(|| GpuError::InitializationError("No GPU adapter found".to_string()))?;
228
229 let (device, queue) = adapter
230 .request_device(
231 &wgpu::DeviceDescriptor {
232 label: Some("Amari GPU Device"),
233 required_features: wgpu::Features::empty(),
234 required_limits: wgpu::Limits::default(),
235 },
236 None,
237 )
238 .await
239 .map_err(|e| GpuError::InitializationError(e.to_string()))?;
240
241 let dim = P + Q + R;
242 let basis_count = 1 << dim;
243
244 let cayley_table = Self::generate_cayley_table::<P, Q, R>();
246 let cayley_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
247 label: Some("Cayley Table"),
248 contents: bytemuck::cast_slice(&cayley_table),
249 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
250 });
251
252 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
254 label: Some("Geometric Product Shader"),
255 source: wgpu::ShaderSource::Wgsl(GEOMETRIC_PRODUCT_SHADER.into()),
256 });
257
258 let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
259 label: Some("Compute Bind Group Layout"),
260 entries: &[
261 wgpu::BindGroupLayoutEntry {
262 binding: 0,
263 visibility: wgpu::ShaderStages::COMPUTE,
264 ty: wgpu::BindingType::Buffer {
265 ty: wgpu::BufferBindingType::Storage { read_only: true },
266 has_dynamic_offset: false,
267 min_binding_size: None,
268 },
269 count: None,
270 },
271 wgpu::BindGroupLayoutEntry {
272 binding: 1,
273 visibility: wgpu::ShaderStages::COMPUTE,
274 ty: wgpu::BindingType::Buffer {
275 ty: wgpu::BufferBindingType::Storage { read_only: true },
276 has_dynamic_offset: false,
277 min_binding_size: None,
278 },
279 count: None,
280 },
281 wgpu::BindGroupLayoutEntry {
282 binding: 2,
283 visibility: wgpu::ShaderStages::COMPUTE,
284 ty: wgpu::BindingType::Buffer {
285 ty: wgpu::BufferBindingType::Storage { read_only: true },
286 has_dynamic_offset: false,
287 min_binding_size: None,
288 },
289 count: None,
290 },
291 wgpu::BindGroupLayoutEntry {
292 binding: 3,
293 visibility: wgpu::ShaderStages::COMPUTE,
294 ty: wgpu::BindingType::Buffer {
295 ty: wgpu::BufferBindingType::Storage { read_only: false },
296 has_dynamic_offset: false,
297 min_binding_size: None,
298 },
299 count: None,
300 },
301 ],
302 });
303
304 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
305 label: Some("Compute Pipeline Layout"),
306 bind_group_layouts: &[&bind_group_layout],
307 push_constant_ranges: &[],
308 });
309
310 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
311 label: Some("Geometric Product Pipeline"),
312 layout: Some(&pipeline_layout),
313 module: &shader,
314 entry_point: "main",
315 });
316
317 Ok(Self {
318 device,
319 queue,
320 compute_pipeline,
321 cayley_buffer,
322 dim,
323 basis_count,
324 })
325 }
326
327 fn generate_cayley_table<const P: usize, const Q: usize, const R: usize>() -> Vec<CayleyEntry> {
329 use amari_core::cayley::CayleyTable;
330
331 let table = CayleyTable::<P, Q, R>::get();
332 let basis_count = 1 << (P + Q + R);
333 let mut flat_table = Vec::with_capacity(basis_count * basis_count);
334
335 for i in 0..basis_count {
336 for j in 0..basis_count {
337 let (sign, index) = table.get_product(i, j);
338 flat_table.push(CayleyEntry {
339 sign: sign as f32,
340 index: index as u32,
341 });
342 }
343 }
344
345 flat_table
346 }
347
348 pub async fn batch_geometric_product(
350 &self,
351 a_batch: &[f64],
352 b_batch: &[f64],
353 ) -> Result<Vec<f64>, GpuError> {
354 let batch_size = a_batch.len() / self.basis_count;
355
356 if a_batch.len() != b_batch.len() {
357 return Err(GpuError::BufferError(
358 "Input batches must have same size".to_string(),
359 ));
360 }
361
362 let a_f32: Vec<f32> = a_batch.iter().map(|&x| x as f32).collect();
364 let b_f32: Vec<f32> = b_batch.iter().map(|&x| x as f32).collect();
365
366 let a_buffer = self
368 .device
369 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
370 label: Some("A Buffer"),
371 contents: bytemuck::cast_slice(&a_f32),
372 usage: wgpu::BufferUsages::STORAGE,
373 });
374
375 let b_buffer = self
376 .device
377 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
378 label: Some("B Buffer"),
379 contents: bytemuck::cast_slice(&b_f32),
380 usage: wgpu::BufferUsages::STORAGE,
381 });
382
383 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
384 label: Some("Output Buffer"),
385 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
386 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
387 mapped_at_creation: false,
388 });
389
390 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
391 label: Some("Staging Buffer"),
392 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
393 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
394 mapped_at_creation: false,
395 });
396
397 let bind_group_layout = self.compute_pipeline.get_bind_group_layout(0);
399 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
400 label: Some("Compute Bind Group"),
401 layout: &bind_group_layout,
402 entries: &[
403 wgpu::BindGroupEntry {
404 binding: 0,
405 resource: self.cayley_buffer.as_entire_binding(),
406 },
407 wgpu::BindGroupEntry {
408 binding: 1,
409 resource: a_buffer.as_entire_binding(),
410 },
411 wgpu::BindGroupEntry {
412 binding: 2,
413 resource: b_buffer.as_entire_binding(),
414 },
415 wgpu::BindGroupEntry {
416 binding: 3,
417 resource: output_buffer.as_entire_binding(),
418 },
419 ],
420 });
421
422 let mut encoder = self
424 .device
425 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
426 label: Some("Compute Encoder"),
427 });
428
429 {
430 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
431 label: Some("Compute Pass"),
432 timestamp_writes: None,
433 });
434
435 compute_pass.set_pipeline(&self.compute_pipeline);
436 compute_pass.set_bind_group(0, &bind_group, &[]);
437 compute_pass.dispatch_workgroups(batch_size as u32, 1, 1);
438 }
439
440 encoder.copy_buffer_to_buffer(
441 &output_buffer,
442 0,
443 &staging_buffer,
444 0,
445 (a_batch.len() * std::mem::size_of::<f32>()) as u64,
446 );
447
448 self.queue.submit(Some(encoder.finish()));
449
450 let buffer_slice = staging_buffer.slice(..);
452 let (sender, receiver) = futures::channel::oneshot::channel();
453 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
454 sender.send(result).unwrap();
455 });
456
457 self.device.poll(wgpu::Maintain::Wait);
458 receiver
459 .await
460 .unwrap()
461 .map_err(|e| GpuError::BufferError(e.to_string()))?;
462
463 let data = buffer_slice.get_mapped_range();
464 let result_f32: &[f32] = bytemuck::cast_slice(&data);
465 let result: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
466
467 drop(data);
468 staging_buffer.unmap();
469
470 Ok(result)
471 }
472
473 pub fn should_use_gpu(operation_count: usize) -> bool {
475 operation_count >= 100
477 }
478}
479
480#[repr(C)]
482#[derive(Copy, Clone, Pod, Zeroable)]
483struct CayleyEntry {
484 sign: f32,
485 index: u32,
486}
487
488const GEOMETRIC_PRODUCT_SHADER: &str = r#"
490struct CayleyEntry {
491 sign: f32,
492 index: u32,
493}
494
495@group(0) @binding(0)
496var<storage, read> cayley_table: array<CayleyEntry>;
497
498@group(0) @binding(1)
499var<storage, read> a_batch: array<f32>;
500
501@group(0) @binding(2)
502var<storage, read> b_batch: array<f32>;
503
504@group(0) @binding(3)
505var<storage, read_write> output: array<f32>;
506
507const BASIS_COUNT: u32 = 8u; // For 3D Clifford algebra
508
509@compute @workgroup_size(1)
510fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
511 let batch_idx = global_id.x;
512 let offset = batch_idx * BASIS_COUNT;
513
514 // Clear output
515 for (var k = 0u; k < BASIS_COUNT; k = k + 1u) {
516 output[offset + k] = 0.0;
517 }
518
519 // Compute geometric product
520 for (var i = 0u; i < BASIS_COUNT; i = i + 1u) {
521 let a_coeff = a_batch[offset + i];
522 if (abs(a_coeff) < 1e-14) {
523 continue;
524 }
525
526 for (var j = 0u; j < BASIS_COUNT; j = j + 1u) {
527 let b_coeff = b_batch[offset + j];
528 if (abs(b_coeff) < 1e-14) {
529 continue;
530 }
531
532 let table_idx = i * BASIS_COUNT + j;
533 let entry = cayley_table[table_idx];
534 output[offset + entry.index] += entry.sign * a_coeff * b_coeff;
535 }
536 }
537}
538"#;
539
540pub struct AdaptiveCompute {
542 gpu: Option<GpuCliffordAlgebra>,
543}
544
545impl AdaptiveCompute {
546 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Self {
548 let gpu = GpuCliffordAlgebra::new::<P, Q, R>().await.ok();
549 Self { gpu }
550 }
551
552 pub async fn geometric_product<const P: usize, const Q: usize, const R: usize>(
554 &self,
555 a: &Multivector<P, Q, R>,
556 b: &Multivector<P, Q, R>,
557 ) -> Multivector<P, Q, R> {
558 a.geometric_product(b)
560 }
561
562 pub async fn batch_geometric_product(
564 &self,
565 a_batch: &[f64],
566 b_batch: &[f64],
567 ) -> Result<Vec<f64>, GpuError> {
568 let batch_size = a_batch.len() / 8; if let Some(gpu) = &self.gpu {
571 if GpuCliffordAlgebra::should_use_gpu(batch_size) {
572 return gpu.batch_geometric_product(a_batch, b_batch).await;
573 }
574 }
575
576 let mut result = Vec::with_capacity(a_batch.len());
578 for i in 0..batch_size {
579 let start = i * 8;
580 let end = start + 8;
581
582 let a = Multivector::<3, 0, 0>::from_coefficients(a_batch[start..end].to_vec());
583 let b = Multivector::<3, 0, 0>::from_coefficients(b_batch[start..end].to_vec());
584 let product = a.geometric_product(&b);
585
586 for j in 0..8 {
587 result.push(product.get(j));
588 }
589 }
590
591 Ok(result)
592 }
593}
594
595pub struct GpuInfoGeometry {
606 device: wgpu::Device,
607 queue: wgpu::Queue,
608 tensor_pipeline: wgpu::ComputePipeline,
609 #[allow(dead_code)]
610 fisher_pipeline: wgpu::ComputePipeline,
611 #[allow(dead_code)]
612 divergence_pipeline: wgpu::ComputePipeline,
613}
614
615impl GpuInfoGeometry {
616 pub async fn new() -> Result<Self, GpuError> {
618 let instance = wgpu::Instance::default();
619
620 let adapter = if let Some(adapter) = instance
622 .request_adapter(&wgpu::RequestAdapterOptions {
623 power_preference: wgpu::PowerPreference::HighPerformance,
624 compatible_surface: None,
625 force_fallback_adapter: false,
626 })
627 .await
628 {
629 adapter
630 } else if let Some(adapter) = instance
631 .request_adapter(&wgpu::RequestAdapterOptions {
632 power_preference: wgpu::PowerPreference::LowPower,
633 compatible_surface: None,
634 force_fallback_adapter: false,
635 })
636 .await
637 {
638 adapter
639 } else if let Some(adapter) = instance
640 .request_adapter(&wgpu::RequestAdapterOptions {
641 power_preference: wgpu::PowerPreference::None,
642 compatible_surface: None,
643 force_fallback_adapter: true,
644 })
645 .await
646 {
647 adapter
648 } else {
649 return Err(GpuError::InitializationError(
650 "No GPU adapter found".to_string(),
651 ));
652 };
653
654 let (device, queue) = adapter
655 .request_device(
656 &wgpu::DeviceDescriptor {
657 label: Some("Amari GPU Info Geometry Device"),
658 required_features: wgpu::Features::empty(),
659 required_limits: wgpu::Limits::default(),
660 },
661 None,
662 )
663 .await
664 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
665
666 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
668 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
669 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
670
671 Ok(Self {
672 device,
673 queue,
674 tensor_pipeline,
675 fisher_pipeline,
676 divergence_pipeline,
677 })
678 }
679
680 pub async fn new_with_device_preference(device_type: &str) -> Result<Self, GpuError> {
682 let (power_preference, force_fallback) = match device_type {
683 "high-performance" => (wgpu::PowerPreference::HighPerformance, false),
684 "low-power" => (wgpu::PowerPreference::LowPower, false),
685 "fallback" => (wgpu::PowerPreference::None, true),
686 _ => {
687 return Err(GpuError::InitializationError(
688 "Invalid device type".to_string(),
689 ))
690 }
691 };
692
693 let instance = wgpu::Instance::default();
694
695 let adapter = instance
696 .request_adapter(&wgpu::RequestAdapterOptions {
697 power_preference,
698 compatible_surface: None,
699 force_fallback_adapter: force_fallback,
700 })
701 .await
702 .ok_or_else(|| {
703 GpuError::InitializationError("No suitable adapter found".to_string())
704 })?;
705
706 let (device, queue) = adapter
707 .request_device(
708 &wgpu::DeviceDescriptor {
709 label: Some("Amari GPU Info Geometry Device"),
710 required_features: wgpu::Features::empty(),
711 required_limits: wgpu::Limits::default(),
712 },
713 None,
714 )
715 .await
716 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
717
718 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
719 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
720 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
721
722 Ok(Self {
723 device,
724 queue,
725 tensor_pipeline,
726 fisher_pipeline,
727 divergence_pipeline,
728 })
729 }
730
731 pub async fn amari_chentsov_tensor(
733 &self,
734 x: &Multivector<3, 0, 0>,
735 y: &Multivector<3, 0, 0>,
736 z: &Multivector<3, 0, 0>,
737 ) -> Result<f64, GpuError> {
738 Ok(amari_chentsov_tensor(x, y, z))
740 }
741
742 pub async fn amari_chentsov_tensor_batch(
753 &self,
754 x_batch: &[Multivector<3, 0, 0>],
755 y_batch: &[Multivector<3, 0, 0>],
756 z_batch: &[Multivector<3, 0, 0>],
757 ) -> Result<Vec<f64>, GpuError> {
758 let batch_size = x_batch.len();
759 if batch_size == 0 {
760 return Ok(Vec::new());
761 }
762
763 if batch_size < 100 {
765 let results = x_batch
766 .iter()
767 .zip(y_batch.iter())
768 .zip(z_batch.iter())
769 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
770 .collect();
771 return Ok(results);
772 }
773
774 let results = x_batch
778 .iter()
779 .zip(y_batch.iter())
780 .zip(z_batch.iter())
781 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
782 .collect();
783 Ok(results)
784 }
785
786 pub async fn amari_chentsov_tensor_from_typed_arrays(
788 &self,
789 flat_data: &[f64],
790 batch_size: usize,
791 ) -> Result<Vec<f64>, GpuError> {
792 if flat_data.len() != batch_size * 9 {
793 return Err(GpuError::BufferError("Invalid flat data size".to_string()));
794 }
795
796 let mut x_batch = Vec::with_capacity(batch_size);
798 let mut y_batch = Vec::with_capacity(batch_size);
799 let mut z_batch = Vec::with_capacity(batch_size);
800
801 for i in 0..batch_size {
802 let base = i * 9;
803 let mut x = Multivector::zero();
804 let mut y = Multivector::zero();
805 let mut z = Multivector::zero();
806
807 x.set_vector_component(0, flat_data[base]);
809 x.set_vector_component(1, flat_data[base + 1]);
810 x.set_vector_component(2, flat_data[base + 2]);
811
812 y.set_vector_component(0, flat_data[base + 3]);
813 y.set_vector_component(1, flat_data[base + 4]);
814 y.set_vector_component(2, flat_data[base + 5]);
815
816 z.set_vector_component(0, flat_data[base + 6]);
817 z.set_vector_component(1, flat_data[base + 7]);
818 z.set_vector_component(2, flat_data[base + 8]);
819
820 x_batch.push(x);
821 y_batch.push(y);
822 z_batch.push(z);
823 }
824
825 self.amari_chentsov_tensor_batch(&x_batch, &y_batch, &z_batch)
826 .await
827 }
828
829 pub async fn device_info(&self) -> Result<GpuDeviceInfo, GpuError> {
831 Ok(GpuDeviceInfo::new(true, "WebGPU Device"))
832 }
833
834 pub async fn memory_usage(&self) -> Result<u64, GpuError> {
836 Ok(1024 * 1024) }
839
840 pub async fn fisher_information_matrix(
842 &self,
843 _parameters: &[f64],
844 ) -> Result<GpuFisherMatrix, GpuError> {
845 Ok(GpuFisherMatrix::new(vec![vec![1.0, 0.0], vec![0.0, 1.0]]))
847 }
848
849 pub async fn bregman_divergence_batch(
851 &self,
852 p_batch: &[Vec<f64>],
853 q_batch: &[Vec<f64>],
854 ) -> Result<Vec<f64>, GpuError> {
855 let results = p_batch
857 .iter()
858 .zip(q_batch.iter())
859 .map(|(p, q)| {
860 p.iter()
862 .zip(q.iter())
863 .map(|(pi, qi)| {
864 if *pi > 0.0 && *qi > 0.0 {
865 pi * (pi / qi).ln()
866 } else {
867 0.0
868 }
869 })
870 .sum()
871 })
872 .collect();
873 Ok(results)
874 }
875
876 #[allow(dead_code)] async fn compute_tensor_batch_gpu(
888 &self,
889 x_batch: &[Multivector<3, 0, 0>],
890 y_batch: &[Multivector<3, 0, 0>],
891 z_batch: &[Multivector<3, 0, 0>],
892 ) -> Result<Vec<f64>, GpuError> {
893 let batch_size = x_batch.len();
894
895 let x_data: Vec<f32> = x_batch
897 .iter()
898 .flat_map(|mv| {
899 vec![
900 mv.vector_component(0) as f32,
901 mv.vector_component(1) as f32,
902 mv.vector_component(2) as f32,
903 ]
904 })
905 .collect();
906
907 let y_data: Vec<f32> = y_batch
908 .iter()
909 .flat_map(|mv| {
910 vec![
911 mv.vector_component(0) as f32,
912 mv.vector_component(1) as f32,
913 mv.vector_component(2) as f32,
914 ]
915 })
916 .collect();
917
918 let z_data: Vec<f32> = z_batch
919 .iter()
920 .flat_map(|mv| {
921 vec![
922 mv.vector_component(0) as f32,
923 mv.vector_component(1) as f32,
924 mv.vector_component(2) as f32,
925 ]
926 })
927 .collect();
928
929 let x_buffer = self
931 .device
932 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
933 label: Some("X Batch Buffer"),
934 contents: bytemuck::cast_slice(&x_data),
935 usage: wgpu::BufferUsages::STORAGE,
936 });
937
938 let y_buffer = self
939 .device
940 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
941 label: Some("Y Batch Buffer"),
942 contents: bytemuck::cast_slice(&y_data),
943 usage: wgpu::BufferUsages::STORAGE,
944 });
945
946 let z_buffer = self
947 .device
948 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
949 label: Some("Z Batch Buffer"),
950 contents: bytemuck::cast_slice(&z_data),
951 usage: wgpu::BufferUsages::STORAGE,
952 });
953
954 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
955 label: Some("Output Buffer"),
956 size: (batch_size * 4) as u64, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
958 mapped_at_creation: false,
959 });
960
961 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
962 label: Some("Staging Buffer"),
963 size: (batch_size * 4) as u64,
964 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
965 mapped_at_creation: false,
966 });
967
968 let bind_group_layout = self.tensor_pipeline.get_bind_group_layout(0);
970 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
971 label: Some("Tensor Compute Bind Group"),
972 layout: &bind_group_layout,
973 entries: &[
974 wgpu::BindGroupEntry {
975 binding: 0,
976 resource: x_buffer.as_entire_binding(),
977 },
978 wgpu::BindGroupEntry {
979 binding: 1,
980 resource: y_buffer.as_entire_binding(),
981 },
982 wgpu::BindGroupEntry {
983 binding: 2,
984 resource: z_buffer.as_entire_binding(),
985 },
986 wgpu::BindGroupEntry {
987 binding: 3,
988 resource: output_buffer.as_entire_binding(),
989 },
990 ],
991 });
992
993 let mut encoder = self
995 .device
996 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
997 label: Some("Tensor Compute Encoder"),
998 });
999
1000 {
1001 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1002 label: Some("Tensor Compute Pass"),
1003 timestamp_writes: None,
1004 });
1005 compute_pass.set_pipeline(&self.tensor_pipeline);
1006 compute_pass.set_bind_group(0, &bind_group, &[]);
1007 let workgroup_count = batch_size.div_ceil(64); compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
1009 }
1010
1011 encoder.copy_buffer_to_buffer(
1012 &output_buffer,
1013 0,
1014 &staging_buffer,
1015 0,
1016 (batch_size * 4) as u64,
1017 );
1018
1019 self.queue.submit(std::iter::once(encoder.finish()));
1020
1021 let buffer_slice = staging_buffer.slice(..);
1023 let (sender, receiver) = futures::channel::oneshot::channel();
1024 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
1025 let _ = sender.send(result);
1026 });
1027
1028 self.device.poll(wgpu::Maintain::Wait);
1029
1030 receiver
1031 .await
1032 .map_err(|_| GpuError::BufferError("Failed to receive buffer map result".to_string()))?
1033 .map_err(|e| GpuError::BufferError(format!("Buffer mapping failed: {:?}", e)))?;
1034
1035 let data = buffer_slice.get_mapped_range();
1036 let result_f32: &[f32] = bytemuck::cast_slice(&data);
1037 let results: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
1038
1039 drop(data);
1040 staging_buffer.unmap();
1041
1042 Ok(results)
1043 }
1044
1045 fn create_tensor_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
1046 let shader_source = TENSOR_COMPUTE_SHADER;
1047 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
1048 label: Some("Tensor Compute Shader"),
1049 source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(shader_source)),
1050 });
1051
1052 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1053 label: Some("Tensor Compute Pipeline"),
1054 layout: None,
1055 module: &shader,
1056 entry_point: "main",
1057 });
1058
1059 Ok(compute_pipeline)
1060 }
1061
1062 fn create_fisher_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
1063 Self::create_tensor_pipeline(device)
1065 }
1066
1067 fn create_divergence_pipeline(
1068 device: &wgpu::Device,
1069 ) -> Result<wgpu::ComputePipeline, GpuError> {
1070 Self::create_tensor_pipeline(device)
1072 }
1073}
1074
1075pub struct GpuDeviceInfo {
1077 is_gpu: bool,
1078 #[allow(dead_code)]
1079 description: String,
1080}
1081
1082impl GpuDeviceInfo {
1083 fn new(is_gpu: bool, description: &str) -> Self {
1084 Self {
1085 is_gpu,
1086 description: description.to_string(),
1087 }
1088 }
1089
1090 pub fn is_gpu(&self) -> bool {
1091 self.is_gpu
1092 }
1093
1094 pub fn supports_webgpu(&self) -> bool {
1095 self.is_gpu
1096 }
1097
1098 pub fn is_initialized(&self) -> bool {
1099 true
1100 }
1101}
1102
1103pub struct GpuFisherMatrix {
1105 matrix: Vec<Vec<f64>>,
1106}
1107
1108impl GpuFisherMatrix {
1109 fn new(matrix: Vec<Vec<f64>>) -> Self {
1110 Self { matrix }
1111 }
1112
1113 pub async fn eigenvalues(&self) -> Result<Vec<f64>, GpuError> {
1114 let mut eigenvals = Vec::new();
1116 for i in 0..self.matrix.len() {
1117 if i < self.matrix[i].len() {
1118 eigenvals.push(self.matrix[i][i]);
1119 }
1120 }
1121 Ok(eigenvals)
1122 }
1123}
1124
1125const TENSOR_COMPUTE_SHADER: &str = r#"
1127@group(0) @binding(0)
1128var<storage, read> x_batch: array<vec3<f32>>;
1129
1130@group(0) @binding(1)
1131var<storage, read> y_batch: array<vec3<f32>>;
1132
1133@group(0) @binding(2)
1134var<storage, read> z_batch: array<vec3<f32>>;
1135
1136@group(0) @binding(3)
1137var<storage, read_write> output: array<f32>;
1138
1139@compute @workgroup_size(64)
1140fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1141 let idx = global_id.x;
1142 if (idx >= arrayLength(&x_batch)) {
1143 return;
1144 }
1145
1146 let x = x_batch[idx];
1147 let y = y_batch[idx];
1148 let z = z_batch[idx];
1149
1150 // Compute scalar triple product: x · (y × z)
1151 let cross_yz = cross(y, z);
1152 let scalar_triple = dot(x, cross_yz);
1153
1154 output[idx] = scalar_triple;
1155}
1156"#;
1157
1158#[cfg(test)]
1159mod tests {
1160 use super::*;
1161
1162 #[test]
1163 fn test_should_use_gpu() {
1164 assert!(!GpuCliffordAlgebra::should_use_gpu(10));
1165 assert!(GpuCliffordAlgebra::should_use_gpu(1000));
1166 }
1167
1168 #[tokio::test]
1169 async fn test_gpu_info_geometry_creation() {
1170 if std::env::var("CI").is_ok()
1172 || std::env::var("GITHUB_ACTIONS").is_ok()
1173 || std::env::var("DISPLAY").is_err()
1174 {
1175 println!("Skipping GPU test in CI environment");
1176 return;
1177 }
1178
1179 match GpuInfoGeometry::new().await {
1181 Ok(_) => {
1182 println!("GPU initialization successful");
1184 }
1185 Err(GpuError::InitializationError(_)) => {
1186 println!("GPU initialization failed - no GPU available");
1188 }
1189 Err(e) => panic!("Unexpected error: {:?}", e),
1190 }
1191 }
1192}