1pub mod adaptive;
104#[cfg(feature = "automata")]
105pub mod automata;
106pub mod benchmarks;
107#[cfg(feature = "calculus")]
108pub mod calculus;
109#[cfg(feature = "dual")]
110pub mod dual;
111#[cfg(feature = "enumerative")]
112pub mod enumerative;
113#[cfg(feature = "functional")]
114pub mod functional;
115#[cfg(feature = "holographic")]
120pub mod holographic;
121#[cfg(feature = "measure")]
122pub mod measure;
123pub mod multi_gpu;
124pub mod network;
125pub mod performance;
126#[cfg(feature = "probabilistic")]
127pub mod probabilistic;
128pub mod relativistic;
129pub mod shaders;
130pub mod timeline;
131pub mod unified;
136pub mod verification;
137
138pub use adaptive::{
139 AdaptiveVerificationError, AdaptiveVerificationLevel, AdaptiveVerifier, CpuFeatures,
140 GpuBackend, PlatformCapabilities, PlatformPerformanceProfile, VerificationPlatform,
141 WasmEnvironment,
142};
143use amari_core::Multivector;
144use amari_info_geom::amari_chentsov_tensor;
145pub use benchmarks::{
146 AmariMultiGpuBenchmarks, BenchmarkConfig, BenchmarkResult, BenchmarkRunner,
147 BenchmarkSuiteResults, BenchmarkSummary, ScalingAnalysis,
148};
149use bytemuck::{Pod, Zeroable};
150#[cfg(feature = "calculus")]
151pub use calculus::GpuCalculus;
152#[cfg(feature = "functional")]
153pub use functional::{
154 AdaptiveFunctionalCompute, GpuFunctionalError, GpuFunctionalResult, GpuHilbertSpace,
155 GpuMatrixOperator, GpuSpectralDecomposition,
156};
157#[cfg(feature = "holographic")]
158pub use holographic::{
159 GpuHolographic, GpuHolographicError, GpuHolographicMemory, GpuHolographicResult,
160};
161#[cfg(feature = "measure")]
162pub use measure::{
163 GpuIntegrator, GpuMonteCarloIntegrator, GpuMultidimIntegrator, GpuParametricDensity,
164 GpuTropicalMeasure,
165};
166pub use multi_gpu::{
167 ComputeIntensity, DeviceCapabilities, DeviceId, DeviceWorkload, GpuArchitecture, GpuDevice,
168 IntelligentLoadBalancer, LoadBalancingStrategy, MultiGpuBarrier, PerformanceRecord,
169 PerformanceStats, SynchronizationManager, Workload, WorkloadCoordinator,
170};
171pub use network::{AdaptiveNetworkCompute, GpuGeometricNetwork, GpuNetworkError, GpuNetworkResult};
172pub use performance::{
173 AdaptiveDispatchPolicy, CalibrationResult, GpuProfile, GpuProfiler, WorkgroupConfig,
174 WorkgroupOptimizer,
175};
176#[cfg(feature = "probabilistic")]
177pub use probabilistic::{GpuProbabilistic, GpuProbabilisticError, GpuProbabilisticResult};
178pub use relativistic::{
179 GpuRelativisticParticle, GpuRelativisticPhysics, GpuSpacetimeVector, GpuTrajectoryParams,
180};
181pub use shaders::{ShaderLibrary, DUAL_SHADERS, FUSION_SHADERS, TROPICAL_SHADERS};
182use thiserror::Error;
183pub use timeline::{
184 BottleneckAnalysis, DeviceUtilizationStats, GpuTimelineAnalyzer, MultiGpuPerformanceMonitor,
185 OptimizationRecommendation, PerformanceAnalysisReport, PerformanceBottleneck,
186 PerformanceSummary, RecommendationPriority, SynchronizationAnalysis, TimelineEvent,
187 UtilizationAnalysis,
188};
189pub use unified::{
190 BufferPoolStats, EnhancedGpuBufferPool, GpuAccelerated, GpuContext, GpuDispatcher,
191 GpuOperationParams, GpuParam, MultiGpuStats, PoolEntryStats, SharedGpuContext, UnifiedGpuError,
192 UnifiedGpuResult,
193};
194pub use verification::{
195 GpuBoundaryVerifier, GpuVerificationError, RelativisticVerifier, StatisticalGpuVerifier,
196 VerificationConfig, VerificationStrategy, VerifiedMultivector,
197};
198use wgpu::util::DeviceExt;
199
200#[derive(Error, Debug)]
201pub enum GpuError {
202 #[error("Failed to initialize GPU: {0}")]
203 InitializationError(String),
204
205 #[error("GPU buffer error: {0}")]
206 BufferError(String),
207
208 #[error("Shader compilation error: {0}")]
209 ShaderError(String),
210}
211
212pub struct GpuCliffordAlgebra {
214 device: wgpu::Device,
215 queue: wgpu::Queue,
216 compute_pipeline: wgpu::ComputePipeline,
217 cayley_buffer: wgpu::Buffer,
218 #[allow(dead_code)]
219 dim: usize,
220 basis_count: usize,
221}
222
223impl GpuCliffordAlgebra {
224 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Result<Self, GpuError> {
226 let instance = wgpu::Instance::default();
227
228 let adapter = instance
229 .request_adapter(&wgpu::RequestAdapterOptions {
230 power_preference: wgpu::PowerPreference::HighPerformance,
231 compatible_surface: None,
232 force_fallback_adapter: false,
233 })
234 .await
235 .ok_or_else(|| GpuError::InitializationError("No GPU adapter found".to_string()))?;
236
237 let (device, queue) = adapter
238 .request_device(
239 &wgpu::DeviceDescriptor {
240 label: Some("Amari GPU Device"),
241 required_features: wgpu::Features::empty(),
242 required_limits: wgpu::Limits::default(),
243 },
244 None,
245 )
246 .await
247 .map_err(|e| GpuError::InitializationError(e.to_string()))?;
248
249 let dim = P + Q + R;
250 let basis_count = 1 << dim;
251
252 let cayley_table = Self::generate_cayley_table::<P, Q, R>();
254 let cayley_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
255 label: Some("Cayley Table"),
256 contents: bytemuck::cast_slice(&cayley_table),
257 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
258 });
259
260 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
262 label: Some("Geometric Product Shader"),
263 source: wgpu::ShaderSource::Wgsl(GEOMETRIC_PRODUCT_SHADER.into()),
264 });
265
266 let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
267 label: Some("Compute Bind Group Layout"),
268 entries: &[
269 wgpu::BindGroupLayoutEntry {
270 binding: 0,
271 visibility: wgpu::ShaderStages::COMPUTE,
272 ty: wgpu::BindingType::Buffer {
273 ty: wgpu::BufferBindingType::Storage { read_only: true },
274 has_dynamic_offset: false,
275 min_binding_size: None,
276 },
277 count: None,
278 },
279 wgpu::BindGroupLayoutEntry {
280 binding: 1,
281 visibility: wgpu::ShaderStages::COMPUTE,
282 ty: wgpu::BindingType::Buffer {
283 ty: wgpu::BufferBindingType::Storage { read_only: true },
284 has_dynamic_offset: false,
285 min_binding_size: None,
286 },
287 count: None,
288 },
289 wgpu::BindGroupLayoutEntry {
290 binding: 2,
291 visibility: wgpu::ShaderStages::COMPUTE,
292 ty: wgpu::BindingType::Buffer {
293 ty: wgpu::BufferBindingType::Storage { read_only: true },
294 has_dynamic_offset: false,
295 min_binding_size: None,
296 },
297 count: None,
298 },
299 wgpu::BindGroupLayoutEntry {
300 binding: 3,
301 visibility: wgpu::ShaderStages::COMPUTE,
302 ty: wgpu::BindingType::Buffer {
303 ty: wgpu::BufferBindingType::Storage { read_only: false },
304 has_dynamic_offset: false,
305 min_binding_size: None,
306 },
307 count: None,
308 },
309 ],
310 });
311
312 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
313 label: Some("Compute Pipeline Layout"),
314 bind_group_layouts: &[&bind_group_layout],
315 push_constant_ranges: &[],
316 });
317
318 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
319 label: Some("Geometric Product Pipeline"),
320 layout: Some(&pipeline_layout),
321 module: &shader,
322 entry_point: "main",
323 });
324
325 Ok(Self {
326 device,
327 queue,
328 compute_pipeline,
329 cayley_buffer,
330 dim,
331 basis_count,
332 })
333 }
334
335 fn generate_cayley_table<const P: usize, const Q: usize, const R: usize>() -> Vec<CayleyEntry> {
337 use amari_core::cayley::CayleyTable;
338
339 let table = CayleyTable::<P, Q, R>::get();
340 let basis_count = 1 << (P + Q + R);
341 let mut flat_table = Vec::with_capacity(basis_count * basis_count);
342
343 for i in 0..basis_count {
344 for j in 0..basis_count {
345 let (sign, index) = table.get_product(i, j);
346 flat_table.push(CayleyEntry {
347 sign: sign as f32,
348 index: index as u32,
349 });
350 }
351 }
352
353 flat_table
354 }
355
356 pub async fn batch_geometric_product(
358 &self,
359 a_batch: &[f64],
360 b_batch: &[f64],
361 ) -> Result<Vec<f64>, GpuError> {
362 let batch_size = a_batch.len() / self.basis_count;
363
364 if a_batch.len() != b_batch.len() {
365 return Err(GpuError::BufferError(
366 "Input batches must have same size".to_string(),
367 ));
368 }
369
370 let a_f32: Vec<f32> = a_batch.iter().map(|&x| x as f32).collect();
372 let b_f32: Vec<f32> = b_batch.iter().map(|&x| x as f32).collect();
373
374 let a_buffer = self
376 .device
377 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
378 label: Some("A Buffer"),
379 contents: bytemuck::cast_slice(&a_f32),
380 usage: wgpu::BufferUsages::STORAGE,
381 });
382
383 let b_buffer = self
384 .device
385 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
386 label: Some("B Buffer"),
387 contents: bytemuck::cast_slice(&b_f32),
388 usage: wgpu::BufferUsages::STORAGE,
389 });
390
391 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
392 label: Some("Output Buffer"),
393 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
394 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
395 mapped_at_creation: false,
396 });
397
398 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
399 label: Some("Staging Buffer"),
400 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
401 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
402 mapped_at_creation: false,
403 });
404
405 let bind_group_layout = self.compute_pipeline.get_bind_group_layout(0);
407 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
408 label: Some("Compute Bind Group"),
409 layout: &bind_group_layout,
410 entries: &[
411 wgpu::BindGroupEntry {
412 binding: 0,
413 resource: self.cayley_buffer.as_entire_binding(),
414 },
415 wgpu::BindGroupEntry {
416 binding: 1,
417 resource: a_buffer.as_entire_binding(),
418 },
419 wgpu::BindGroupEntry {
420 binding: 2,
421 resource: b_buffer.as_entire_binding(),
422 },
423 wgpu::BindGroupEntry {
424 binding: 3,
425 resource: output_buffer.as_entire_binding(),
426 },
427 ],
428 });
429
430 let mut encoder = self
432 .device
433 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
434 label: Some("Compute Encoder"),
435 });
436
437 {
438 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
439 label: Some("Compute Pass"),
440 timestamp_writes: None,
441 });
442
443 compute_pass.set_pipeline(&self.compute_pipeline);
444 compute_pass.set_bind_group(0, &bind_group, &[]);
445 compute_pass.dispatch_workgroups(batch_size as u32, 1, 1);
446 }
447
448 encoder.copy_buffer_to_buffer(
449 &output_buffer,
450 0,
451 &staging_buffer,
452 0,
453 (a_batch.len() * std::mem::size_of::<f32>()) as u64,
454 );
455
456 self.queue.submit(Some(encoder.finish()));
457
458 let buffer_slice = staging_buffer.slice(..);
460 let (sender, receiver) = futures::channel::oneshot::channel();
461 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
462 sender.send(result).unwrap();
463 });
464
465 self.device.poll(wgpu::Maintain::Wait);
466 receiver
467 .await
468 .unwrap()
469 .map_err(|e| GpuError::BufferError(e.to_string()))?;
470
471 let data = buffer_slice.get_mapped_range();
472 let result_f32: &[f32] = bytemuck::cast_slice(&data);
473 let result: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
474
475 drop(data);
476 staging_buffer.unmap();
477
478 Ok(result)
479 }
480
481 pub fn should_use_gpu(operation_count: usize) -> bool {
483 operation_count >= 100
485 }
486}
487
488#[repr(C)]
490#[derive(Copy, Clone, Pod, Zeroable)]
491struct CayleyEntry {
492 sign: f32,
493 index: u32,
494}
495
496const GEOMETRIC_PRODUCT_SHADER: &str = r#"
498struct CayleyEntry {
499 sign: f32,
500 index: u32,
501}
502
503@group(0) @binding(0)
504var<storage, read> cayley_table: array<CayleyEntry>;
505
506@group(0) @binding(1)
507var<storage, read> a_batch: array<f32>;
508
509@group(0) @binding(2)
510var<storage, read> b_batch: array<f32>;
511
512@group(0) @binding(3)
513var<storage, read_write> output: array<f32>;
514
515const BASIS_COUNT: u32 = 8u; // For 3D Clifford algebra
516
517@compute @workgroup_size(1)
518fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
519 let batch_idx = global_id.x;
520 let offset = batch_idx * BASIS_COUNT;
521
522 // Clear output
523 for (var k = 0u; k < BASIS_COUNT; k = k + 1u) {
524 output[offset + k] = 0.0;
525 }
526
527 // Compute geometric product
528 for (var i = 0u; i < BASIS_COUNT; i = i + 1u) {
529 let a_coeff = a_batch[offset + i];
530 if (abs(a_coeff) < 1e-14) {
531 continue;
532 }
533
534 for (var j = 0u; j < BASIS_COUNT; j = j + 1u) {
535 let b_coeff = b_batch[offset + j];
536 if (abs(b_coeff) < 1e-14) {
537 continue;
538 }
539
540 let table_idx = i * BASIS_COUNT + j;
541 let entry = cayley_table[table_idx];
542 output[offset + entry.index] += entry.sign * a_coeff * b_coeff;
543 }
544 }
545}
546"#;
547
548pub struct AdaptiveCompute {
550 gpu: Option<GpuCliffordAlgebra>,
551}
552
553impl AdaptiveCompute {
554 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Self {
556 let gpu = GpuCliffordAlgebra::new::<P, Q, R>().await.ok();
557 Self { gpu }
558 }
559
560 pub async fn geometric_product<const P: usize, const Q: usize, const R: usize>(
562 &self,
563 a: &Multivector<P, Q, R>,
564 b: &Multivector<P, Q, R>,
565 ) -> Multivector<P, Q, R> {
566 a.geometric_product(b)
568 }
569
570 pub async fn batch_geometric_product(
572 &self,
573 a_batch: &[f64],
574 b_batch: &[f64],
575 ) -> Result<Vec<f64>, GpuError> {
576 let batch_size = a_batch.len() / 8; if let Some(gpu) = &self.gpu {
579 if GpuCliffordAlgebra::should_use_gpu(batch_size) {
580 return gpu.batch_geometric_product(a_batch, b_batch).await;
581 }
582 }
583
584 let mut result = Vec::with_capacity(a_batch.len());
586 for i in 0..batch_size {
587 let start = i * 8;
588 let end = start + 8;
589
590 let a = Multivector::<3, 0, 0>::from_coefficients(a_batch[start..end].to_vec());
591 let b = Multivector::<3, 0, 0>::from_coefficients(b_batch[start..end].to_vec());
592 let product = a.geometric_product(&b);
593
594 for j in 0..8 {
595 result.push(product.get(j));
596 }
597 }
598
599 Ok(result)
600 }
601}
602
603pub struct GpuInfoGeometry {
614 device: wgpu::Device,
615 queue: wgpu::Queue,
616 tensor_pipeline: wgpu::ComputePipeline,
617 #[allow(dead_code)]
618 fisher_pipeline: wgpu::ComputePipeline,
619 #[allow(dead_code)]
620 divergence_pipeline: wgpu::ComputePipeline,
621}
622
623impl GpuInfoGeometry {
624 pub async fn new() -> Result<Self, GpuError> {
626 let instance = wgpu::Instance::default();
627
628 let adapter = if let Some(adapter) = instance
630 .request_adapter(&wgpu::RequestAdapterOptions {
631 power_preference: wgpu::PowerPreference::HighPerformance,
632 compatible_surface: None,
633 force_fallback_adapter: false,
634 })
635 .await
636 {
637 adapter
638 } else if let Some(adapter) = instance
639 .request_adapter(&wgpu::RequestAdapterOptions {
640 power_preference: wgpu::PowerPreference::LowPower,
641 compatible_surface: None,
642 force_fallback_adapter: false,
643 })
644 .await
645 {
646 adapter
647 } else if let Some(adapter) = instance
648 .request_adapter(&wgpu::RequestAdapterOptions {
649 power_preference: wgpu::PowerPreference::None,
650 compatible_surface: None,
651 force_fallback_adapter: true,
652 })
653 .await
654 {
655 adapter
656 } else {
657 return Err(GpuError::InitializationError(
658 "No GPU adapter found".to_string(),
659 ));
660 };
661
662 let (device, queue) = adapter
663 .request_device(
664 &wgpu::DeviceDescriptor {
665 label: Some("Amari GPU Info Geometry Device"),
666 required_features: wgpu::Features::empty(),
667 required_limits: wgpu::Limits::default(),
668 },
669 None,
670 )
671 .await
672 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
673
674 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
676 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
677 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
678
679 Ok(Self {
680 device,
681 queue,
682 tensor_pipeline,
683 fisher_pipeline,
684 divergence_pipeline,
685 })
686 }
687
688 pub async fn new_with_device_preference(device_type: &str) -> Result<Self, GpuError> {
690 let (power_preference, force_fallback) = match device_type {
691 "high-performance" => (wgpu::PowerPreference::HighPerformance, false),
692 "low-power" => (wgpu::PowerPreference::LowPower, false),
693 "fallback" => (wgpu::PowerPreference::None, true),
694 _ => {
695 return Err(GpuError::InitializationError(
696 "Invalid device type".to_string(),
697 ))
698 }
699 };
700
701 let instance = wgpu::Instance::default();
702
703 let adapter = instance
704 .request_adapter(&wgpu::RequestAdapterOptions {
705 power_preference,
706 compatible_surface: None,
707 force_fallback_adapter: force_fallback,
708 })
709 .await
710 .ok_or_else(|| {
711 GpuError::InitializationError("No suitable adapter found".to_string())
712 })?;
713
714 let (device, queue) = adapter
715 .request_device(
716 &wgpu::DeviceDescriptor {
717 label: Some("Amari GPU Info Geometry Device"),
718 required_features: wgpu::Features::empty(),
719 required_limits: wgpu::Limits::default(),
720 },
721 None,
722 )
723 .await
724 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
725
726 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
727 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
728 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
729
730 Ok(Self {
731 device,
732 queue,
733 tensor_pipeline,
734 fisher_pipeline,
735 divergence_pipeline,
736 })
737 }
738
739 pub async fn amari_chentsov_tensor(
741 &self,
742 x: &Multivector<3, 0, 0>,
743 y: &Multivector<3, 0, 0>,
744 z: &Multivector<3, 0, 0>,
745 ) -> Result<f64, GpuError> {
746 Ok(amari_chentsov_tensor(x, y, z))
748 }
749
750 pub async fn amari_chentsov_tensor_batch(
761 &self,
762 x_batch: &[Multivector<3, 0, 0>],
763 y_batch: &[Multivector<3, 0, 0>],
764 z_batch: &[Multivector<3, 0, 0>],
765 ) -> Result<Vec<f64>, GpuError> {
766 let batch_size = x_batch.len();
767 if batch_size == 0 {
768 return Ok(Vec::new());
769 }
770
771 if batch_size < 100 {
773 let results = x_batch
774 .iter()
775 .zip(y_batch.iter())
776 .zip(z_batch.iter())
777 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
778 .collect();
779 return Ok(results);
780 }
781
782 let results = x_batch
786 .iter()
787 .zip(y_batch.iter())
788 .zip(z_batch.iter())
789 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
790 .collect();
791 Ok(results)
792 }
793
794 pub async fn amari_chentsov_tensor_from_typed_arrays(
796 &self,
797 flat_data: &[f64],
798 batch_size: usize,
799 ) -> Result<Vec<f64>, GpuError> {
800 if flat_data.len() != batch_size * 9 {
801 return Err(GpuError::BufferError("Invalid flat data size".to_string()));
802 }
803
804 let mut x_batch = Vec::with_capacity(batch_size);
806 let mut y_batch = Vec::with_capacity(batch_size);
807 let mut z_batch = Vec::with_capacity(batch_size);
808
809 for i in 0..batch_size {
810 let base = i * 9;
811 let mut x = Multivector::zero();
812 let mut y = Multivector::zero();
813 let mut z = Multivector::zero();
814
815 x.set_vector_component(0, flat_data[base]);
817 x.set_vector_component(1, flat_data[base + 1]);
818 x.set_vector_component(2, flat_data[base + 2]);
819
820 y.set_vector_component(0, flat_data[base + 3]);
821 y.set_vector_component(1, flat_data[base + 4]);
822 y.set_vector_component(2, flat_data[base + 5]);
823
824 z.set_vector_component(0, flat_data[base + 6]);
825 z.set_vector_component(1, flat_data[base + 7]);
826 z.set_vector_component(2, flat_data[base + 8]);
827
828 x_batch.push(x);
829 y_batch.push(y);
830 z_batch.push(z);
831 }
832
833 self.amari_chentsov_tensor_batch(&x_batch, &y_batch, &z_batch)
834 .await
835 }
836
837 pub async fn device_info(&self) -> Result<GpuDeviceInfo, GpuError> {
839 Ok(GpuDeviceInfo::new(true, "WebGPU Device"))
840 }
841
842 pub async fn memory_usage(&self) -> Result<u64, GpuError> {
844 Ok(1024 * 1024) }
847
848 pub async fn fisher_information_matrix(
850 &self,
851 _parameters: &[f64],
852 ) -> Result<GpuFisherMatrix, GpuError> {
853 Ok(GpuFisherMatrix::new(vec![vec![1.0, 0.0], vec![0.0, 1.0]]))
855 }
856
857 pub async fn bregman_divergence_batch(
859 &self,
860 p_batch: &[Vec<f64>],
861 q_batch: &[Vec<f64>],
862 ) -> Result<Vec<f64>, GpuError> {
863 let results = p_batch
865 .iter()
866 .zip(q_batch.iter())
867 .map(|(p, q)| {
868 p.iter()
870 .zip(q.iter())
871 .map(|(pi, qi)| {
872 if *pi > 0.0 && *qi > 0.0 {
873 pi * (pi / qi).ln()
874 } else {
875 0.0
876 }
877 })
878 .sum()
879 })
880 .collect();
881 Ok(results)
882 }
883
884 #[allow(dead_code)] async fn compute_tensor_batch_gpu(
896 &self,
897 x_batch: &[Multivector<3, 0, 0>],
898 y_batch: &[Multivector<3, 0, 0>],
899 z_batch: &[Multivector<3, 0, 0>],
900 ) -> Result<Vec<f64>, GpuError> {
901 let batch_size = x_batch.len();
902
903 let x_data: Vec<f32> = x_batch
905 .iter()
906 .flat_map(|mv| {
907 vec![
908 mv.vector_component(0) as f32,
909 mv.vector_component(1) as f32,
910 mv.vector_component(2) as f32,
911 ]
912 })
913 .collect();
914
915 let y_data: Vec<f32> = y_batch
916 .iter()
917 .flat_map(|mv| {
918 vec![
919 mv.vector_component(0) as f32,
920 mv.vector_component(1) as f32,
921 mv.vector_component(2) as f32,
922 ]
923 })
924 .collect();
925
926 let z_data: Vec<f32> = z_batch
927 .iter()
928 .flat_map(|mv| {
929 vec![
930 mv.vector_component(0) as f32,
931 mv.vector_component(1) as f32,
932 mv.vector_component(2) as f32,
933 ]
934 })
935 .collect();
936
937 let x_buffer = self
939 .device
940 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
941 label: Some("X Batch Buffer"),
942 contents: bytemuck::cast_slice(&x_data),
943 usage: wgpu::BufferUsages::STORAGE,
944 });
945
946 let y_buffer = self
947 .device
948 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
949 label: Some("Y Batch Buffer"),
950 contents: bytemuck::cast_slice(&y_data),
951 usage: wgpu::BufferUsages::STORAGE,
952 });
953
954 let z_buffer = self
955 .device
956 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
957 label: Some("Z Batch Buffer"),
958 contents: bytemuck::cast_slice(&z_data),
959 usage: wgpu::BufferUsages::STORAGE,
960 });
961
962 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
963 label: Some("Output Buffer"),
964 size: (batch_size * 4) as u64, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
966 mapped_at_creation: false,
967 });
968
969 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
970 label: Some("Staging Buffer"),
971 size: (batch_size * 4) as u64,
972 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
973 mapped_at_creation: false,
974 });
975
976 let bind_group_layout = self.tensor_pipeline.get_bind_group_layout(0);
978 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
979 label: Some("Tensor Compute Bind Group"),
980 layout: &bind_group_layout,
981 entries: &[
982 wgpu::BindGroupEntry {
983 binding: 0,
984 resource: x_buffer.as_entire_binding(),
985 },
986 wgpu::BindGroupEntry {
987 binding: 1,
988 resource: y_buffer.as_entire_binding(),
989 },
990 wgpu::BindGroupEntry {
991 binding: 2,
992 resource: z_buffer.as_entire_binding(),
993 },
994 wgpu::BindGroupEntry {
995 binding: 3,
996 resource: output_buffer.as_entire_binding(),
997 },
998 ],
999 });
1000
1001 let mut encoder = self
1003 .device
1004 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1005 label: Some("Tensor Compute Encoder"),
1006 });
1007
1008 {
1009 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1010 label: Some("Tensor Compute Pass"),
1011 timestamp_writes: None,
1012 });
1013 compute_pass.set_pipeline(&self.tensor_pipeline);
1014 compute_pass.set_bind_group(0, &bind_group, &[]);
1015 let workgroup_count = batch_size.div_ceil(64); compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
1017 }
1018
1019 encoder.copy_buffer_to_buffer(
1020 &output_buffer,
1021 0,
1022 &staging_buffer,
1023 0,
1024 (batch_size * 4) as u64,
1025 );
1026
1027 self.queue.submit(std::iter::once(encoder.finish()));
1028
1029 let buffer_slice = staging_buffer.slice(..);
1031 let (sender, receiver) = futures::channel::oneshot::channel();
1032 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
1033 let _ = sender.send(result);
1034 });
1035
1036 self.device.poll(wgpu::Maintain::Wait);
1037
1038 receiver
1039 .await
1040 .map_err(|_| GpuError::BufferError("Failed to receive buffer map result".to_string()))?
1041 .map_err(|e| GpuError::BufferError(format!("Buffer mapping failed: {:?}", e)))?;
1042
1043 let data = buffer_slice.get_mapped_range();
1044 let result_f32: &[f32] = bytemuck::cast_slice(&data);
1045 let results: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
1046
1047 drop(data);
1048 staging_buffer.unmap();
1049
1050 Ok(results)
1051 }
1052
1053 fn create_tensor_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
1054 let shader_source = TENSOR_COMPUTE_SHADER;
1055 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
1056 label: Some("Tensor Compute Shader"),
1057 source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(shader_source)),
1058 });
1059
1060 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1061 label: Some("Tensor Compute Pipeline"),
1062 layout: None,
1063 module: &shader,
1064 entry_point: "main",
1065 });
1066
1067 Ok(compute_pipeline)
1068 }
1069
1070 fn create_fisher_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
1071 Self::create_tensor_pipeline(device)
1073 }
1074
1075 fn create_divergence_pipeline(
1076 device: &wgpu::Device,
1077 ) -> Result<wgpu::ComputePipeline, GpuError> {
1078 Self::create_tensor_pipeline(device)
1080 }
1081}
1082
1083pub struct GpuDeviceInfo {
1085 is_gpu: bool,
1086 #[allow(dead_code)]
1087 description: String,
1088}
1089
1090impl GpuDeviceInfo {
1091 fn new(is_gpu: bool, description: &str) -> Self {
1092 Self {
1093 is_gpu,
1094 description: description.to_string(),
1095 }
1096 }
1097
1098 pub fn is_gpu(&self) -> bool {
1099 self.is_gpu
1100 }
1101
1102 pub fn supports_webgpu(&self) -> bool {
1103 self.is_gpu
1104 }
1105
1106 pub fn is_initialized(&self) -> bool {
1107 true
1108 }
1109}
1110
1111pub struct GpuFisherMatrix {
1113 matrix: Vec<Vec<f64>>,
1114}
1115
1116impl GpuFisherMatrix {
1117 fn new(matrix: Vec<Vec<f64>>) -> Self {
1118 Self { matrix }
1119 }
1120
1121 pub async fn eigenvalues(&self) -> Result<Vec<f64>, GpuError> {
1122 let mut eigenvals = Vec::new();
1124 for i in 0..self.matrix.len() {
1125 if i < self.matrix[i].len() {
1126 eigenvals.push(self.matrix[i][i]);
1127 }
1128 }
1129 Ok(eigenvals)
1130 }
1131}
1132
1133const TENSOR_COMPUTE_SHADER: &str = r#"
1135@group(0) @binding(0)
1136var<storage, read> x_batch: array<vec3<f32>>;
1137
1138@group(0) @binding(1)
1139var<storage, read> y_batch: array<vec3<f32>>;
1140
1141@group(0) @binding(2)
1142var<storage, read> z_batch: array<vec3<f32>>;
1143
1144@group(0) @binding(3)
1145var<storage, read_write> output: array<f32>;
1146
1147@compute @workgroup_size(64)
1148fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1149 let idx = global_id.x;
1150 if (idx >= arrayLength(&x_batch)) {
1151 return;
1152 }
1153
1154 let x = x_batch[idx];
1155 let y = y_batch[idx];
1156 let z = z_batch[idx];
1157
1158 // Compute scalar triple product: x · (y × z)
1159 let cross_yz = cross(y, z);
1160 let scalar_triple = dot(x, cross_yz);
1161
1162 output[idx] = scalar_triple;
1163}
1164"#;
1165
1166#[cfg(test)]
1167mod tests {
1168 use super::*;
1169
1170 #[test]
1171 fn test_should_use_gpu() {
1172 assert!(!GpuCliffordAlgebra::should_use_gpu(10));
1173 assert!(GpuCliffordAlgebra::should_use_gpu(1000));
1174 }
1175
1176 #[tokio::test]
1177 async fn test_gpu_info_geometry_creation() {
1178 if std::env::var("CI").is_ok()
1180 || std::env::var("GITHUB_ACTIONS").is_ok()
1181 || std::env::var("DISPLAY").is_err()
1182 {
1183 println!("Skipping GPU test in CI environment");
1184 return;
1185 }
1186
1187 match GpuInfoGeometry::new().await {
1189 Ok(_) => {
1190 println!("GPU initialization successful");
1192 }
1193 Err(GpuError::InitializationError(_)) => {
1194 println!("GPU initialization failed - no GPU available");
1196 }
1197 Err(e) => panic!("Unexpected error: {:?}", e),
1198 }
1199 }
1200}