1pub mod adaptive;
104#[cfg(feature = "automata")]
105pub mod automata;
106pub mod benchmarks;
107#[cfg(feature = "calculus")]
108pub mod calculus;
109#[cfg(feature = "dual")]
110pub mod dual;
111#[cfg(feature = "enumerative")]
112pub mod enumerative;
113#[cfg(feature = "functional")]
114pub mod functional;
115#[cfg(feature = "holographic")]
120pub mod holographic;
121#[cfg(feature = "measure")]
122pub mod measure;
123pub mod multi_gpu;
124pub mod network;
125pub mod performance;
126#[cfg(feature = "probabilistic")]
127pub mod probabilistic;
128pub mod relativistic;
129pub mod shaders;
130pub mod timeline;
131pub mod unified;
136pub mod verification;
137
138pub use adaptive::{
139 AdaptiveVerificationError, AdaptiveVerificationLevel, AdaptiveVerifier, CpuFeatures,
140 GpuBackend, PlatformCapabilities, PlatformPerformanceProfile, VerificationPlatform,
141 WasmEnvironment,
142};
143use amari_core::Multivector;
144use amari_info_geom::amari_chentsov_tensor;
145pub use benchmarks::{
146 AmariMultiGpuBenchmarks, BenchmarkConfig, BenchmarkResult, BenchmarkRunner,
147 BenchmarkSuiteResults, BenchmarkSummary, ScalingAnalysis,
148};
149use bytemuck::{Pod, Zeroable};
150#[cfg(feature = "calculus")]
151pub use calculus::GpuCalculus;
152#[cfg(feature = "functional")]
153pub use functional::{
154 AdaptiveFunctionalCompute, GpuFunctionalError, GpuFunctionalResult, GpuHilbertSpace,
155 GpuMatrixOperator, GpuSpectralDecomposition,
156};
157#[cfg(feature = "holographic")]
158pub use holographic::{
159 GpuHolographic, GpuHolographicError, GpuHolographicMemory, GpuHolographicResult,
160 GpuOpticalField,
161};
162#[cfg(feature = "measure")]
163pub use measure::{
164 GpuIntegrator, GpuMonteCarloIntegrator, GpuMultidimIntegrator, GpuParametricDensity,
165 GpuTropicalMeasure,
166};
167pub use multi_gpu::{
168 ComputeIntensity, DeviceCapabilities, DeviceId, DeviceWorkload, GpuArchitecture, GpuDevice,
169 IntelligentLoadBalancer, LoadBalancingStrategy, MultiGpuBarrier, PerformanceRecord,
170 PerformanceStats, SynchronizationManager, Workload, WorkloadCoordinator,
171};
172pub use network::{AdaptiveNetworkCompute, GpuGeometricNetwork, GpuNetworkError, GpuNetworkResult};
173pub use performance::{
174 AdaptiveDispatchPolicy, CalibrationResult, GpuProfile, GpuProfiler, WorkgroupConfig,
175 WorkgroupOptimizer,
176};
177#[cfg(feature = "probabilistic")]
178pub use probabilistic::{GpuProbabilistic, GpuProbabilisticError, GpuProbabilisticResult};
179pub use relativistic::{
180 GpuRelativisticParticle, GpuRelativisticPhysics, GpuSpacetimeVector, GpuTrajectoryParams,
181};
182pub use shaders::{ShaderLibrary, DUAL_SHADERS, FUSION_SHADERS, TROPICAL_SHADERS};
183use thiserror::Error;
184pub use timeline::{
185 BottleneckAnalysis, DeviceUtilizationStats, GpuTimelineAnalyzer, MultiGpuPerformanceMonitor,
186 OptimizationRecommendation, PerformanceAnalysisReport, PerformanceBottleneck,
187 PerformanceSummary, RecommendationPriority, SynchronizationAnalysis, TimelineEvent,
188 UtilizationAnalysis,
189};
190pub use unified::{
191 BufferPoolStats, EnhancedGpuBufferPool, GpuAccelerated, GpuContext, GpuDispatcher,
192 GpuOperationParams, GpuParam, MultiGpuStats, PoolEntryStats, SharedGpuContext, UnifiedGpuError,
193 UnifiedGpuResult,
194};
195pub use verification::{
196 GpuBoundaryVerifier, GpuVerificationError, RelativisticVerifier, StatisticalGpuVerifier,
197 VerificationConfig, VerificationStrategy, VerifiedMultivector,
198};
199use wgpu::util::DeviceExt;
200
201#[derive(Error, Debug)]
202pub enum GpuError {
203 #[error("Failed to initialize GPU: {0}")]
204 InitializationError(String),
205
206 #[error("GPU buffer error: {0}")]
207 BufferError(String),
208
209 #[error("Shader compilation error: {0}")]
210 ShaderError(String),
211}
212
213pub struct GpuCliffordAlgebra {
215 device: wgpu::Device,
216 queue: wgpu::Queue,
217 compute_pipeline: wgpu::ComputePipeline,
218 cayley_buffer: wgpu::Buffer,
219 #[allow(dead_code)]
220 dim: usize,
221 basis_count: usize,
222}
223
224impl GpuCliffordAlgebra {
225 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Result<Self, GpuError> {
227 let instance = wgpu::Instance::default();
228
229 let adapter = instance
230 .request_adapter(&wgpu::RequestAdapterOptions {
231 power_preference: wgpu::PowerPreference::HighPerformance,
232 compatible_surface: None,
233 force_fallback_adapter: false,
234 })
235 .await
236 .ok_or_else(|| GpuError::InitializationError("No GPU adapter found".to_string()))?;
237
238 let (device, queue) = adapter
239 .request_device(
240 &wgpu::DeviceDescriptor {
241 label: Some("Amari GPU Device"),
242 required_features: wgpu::Features::empty(),
243 required_limits: wgpu::Limits::default(),
244 },
245 None,
246 )
247 .await
248 .map_err(|e| GpuError::InitializationError(e.to_string()))?;
249
250 let dim = P + Q + R;
251 let basis_count = 1 << dim;
252
253 let cayley_table = Self::generate_cayley_table::<P, Q, R>();
255 let cayley_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
256 label: Some("Cayley Table"),
257 contents: bytemuck::cast_slice(&cayley_table),
258 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
259 });
260
261 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
263 label: Some("Geometric Product Shader"),
264 source: wgpu::ShaderSource::Wgsl(GEOMETRIC_PRODUCT_SHADER.into()),
265 });
266
267 let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
268 label: Some("Compute Bind Group Layout"),
269 entries: &[
270 wgpu::BindGroupLayoutEntry {
271 binding: 0,
272 visibility: wgpu::ShaderStages::COMPUTE,
273 ty: wgpu::BindingType::Buffer {
274 ty: wgpu::BufferBindingType::Storage { read_only: true },
275 has_dynamic_offset: false,
276 min_binding_size: None,
277 },
278 count: None,
279 },
280 wgpu::BindGroupLayoutEntry {
281 binding: 1,
282 visibility: wgpu::ShaderStages::COMPUTE,
283 ty: wgpu::BindingType::Buffer {
284 ty: wgpu::BufferBindingType::Storage { read_only: true },
285 has_dynamic_offset: false,
286 min_binding_size: None,
287 },
288 count: None,
289 },
290 wgpu::BindGroupLayoutEntry {
291 binding: 2,
292 visibility: wgpu::ShaderStages::COMPUTE,
293 ty: wgpu::BindingType::Buffer {
294 ty: wgpu::BufferBindingType::Storage { read_only: true },
295 has_dynamic_offset: false,
296 min_binding_size: None,
297 },
298 count: None,
299 },
300 wgpu::BindGroupLayoutEntry {
301 binding: 3,
302 visibility: wgpu::ShaderStages::COMPUTE,
303 ty: wgpu::BindingType::Buffer {
304 ty: wgpu::BufferBindingType::Storage { read_only: false },
305 has_dynamic_offset: false,
306 min_binding_size: None,
307 },
308 count: None,
309 },
310 ],
311 });
312
313 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
314 label: Some("Compute Pipeline Layout"),
315 bind_group_layouts: &[&bind_group_layout],
316 push_constant_ranges: &[],
317 });
318
319 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
320 label: Some("Geometric Product Pipeline"),
321 layout: Some(&pipeline_layout),
322 module: &shader,
323 entry_point: "main",
324 });
325
326 Ok(Self {
327 device,
328 queue,
329 compute_pipeline,
330 cayley_buffer,
331 dim,
332 basis_count,
333 })
334 }
335
336 fn generate_cayley_table<const P: usize, const Q: usize, const R: usize>() -> Vec<CayleyEntry> {
338 use amari_core::cayley::CayleyTable;
339
340 let table = CayleyTable::<P, Q, R>::get();
341 let basis_count = 1 << (P + Q + R);
342 let mut flat_table = Vec::with_capacity(basis_count * basis_count);
343
344 for i in 0..basis_count {
345 for j in 0..basis_count {
346 let (sign, index) = table.get_product(i, j);
347 flat_table.push(CayleyEntry {
348 sign: sign as f32,
349 index: index as u32,
350 });
351 }
352 }
353
354 flat_table
355 }
356
357 pub async fn batch_geometric_product(
359 &self,
360 a_batch: &[f64],
361 b_batch: &[f64],
362 ) -> Result<Vec<f64>, GpuError> {
363 let batch_size = a_batch.len() / self.basis_count;
364
365 if a_batch.len() != b_batch.len() {
366 return Err(GpuError::BufferError(
367 "Input batches must have same size".to_string(),
368 ));
369 }
370
371 let a_f32: Vec<f32> = a_batch.iter().map(|&x| x as f32).collect();
373 let b_f32: Vec<f32> = b_batch.iter().map(|&x| x as f32).collect();
374
375 let a_buffer = self
377 .device
378 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
379 label: Some("A Buffer"),
380 contents: bytemuck::cast_slice(&a_f32),
381 usage: wgpu::BufferUsages::STORAGE,
382 });
383
384 let b_buffer = self
385 .device
386 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
387 label: Some("B Buffer"),
388 contents: bytemuck::cast_slice(&b_f32),
389 usage: wgpu::BufferUsages::STORAGE,
390 });
391
392 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
393 label: Some("Output Buffer"),
394 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
395 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
396 mapped_at_creation: false,
397 });
398
399 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
400 label: Some("Staging Buffer"),
401 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
402 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
403 mapped_at_creation: false,
404 });
405
406 let bind_group_layout = self.compute_pipeline.get_bind_group_layout(0);
408 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
409 label: Some("Compute Bind Group"),
410 layout: &bind_group_layout,
411 entries: &[
412 wgpu::BindGroupEntry {
413 binding: 0,
414 resource: self.cayley_buffer.as_entire_binding(),
415 },
416 wgpu::BindGroupEntry {
417 binding: 1,
418 resource: a_buffer.as_entire_binding(),
419 },
420 wgpu::BindGroupEntry {
421 binding: 2,
422 resource: b_buffer.as_entire_binding(),
423 },
424 wgpu::BindGroupEntry {
425 binding: 3,
426 resource: output_buffer.as_entire_binding(),
427 },
428 ],
429 });
430
431 let mut encoder = self
433 .device
434 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
435 label: Some("Compute Encoder"),
436 });
437
438 {
439 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
440 label: Some("Compute Pass"),
441 timestamp_writes: None,
442 });
443
444 compute_pass.set_pipeline(&self.compute_pipeline);
445 compute_pass.set_bind_group(0, &bind_group, &[]);
446 compute_pass.dispatch_workgroups(batch_size as u32, 1, 1);
447 }
448
449 encoder.copy_buffer_to_buffer(
450 &output_buffer,
451 0,
452 &staging_buffer,
453 0,
454 (a_batch.len() * std::mem::size_of::<f32>()) as u64,
455 );
456
457 self.queue.submit(Some(encoder.finish()));
458
459 let buffer_slice = staging_buffer.slice(..);
461 let (sender, receiver) = futures::channel::oneshot::channel();
462 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
463 sender.send(result).unwrap();
464 });
465
466 self.device.poll(wgpu::Maintain::Wait);
467 receiver
468 .await
469 .unwrap()
470 .map_err(|e| GpuError::BufferError(e.to_string()))?;
471
472 let data = buffer_slice.get_mapped_range();
473 let result_f32: &[f32] = bytemuck::cast_slice(&data);
474 let result: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
475
476 drop(data);
477 staging_buffer.unmap();
478
479 Ok(result)
480 }
481
482 pub fn should_use_gpu(operation_count: usize) -> bool {
484 operation_count >= 100
486 }
487}
488
489#[repr(C)]
491#[derive(Copy, Clone, Pod, Zeroable)]
492struct CayleyEntry {
493 sign: f32,
494 index: u32,
495}
496
497const GEOMETRIC_PRODUCT_SHADER: &str = r#"
499struct CayleyEntry {
500 sign: f32,
501 index: u32,
502}
503
504@group(0) @binding(0)
505var<storage, read> cayley_table: array<CayleyEntry>;
506
507@group(0) @binding(1)
508var<storage, read> a_batch: array<f32>;
509
510@group(0) @binding(2)
511var<storage, read> b_batch: array<f32>;
512
513@group(0) @binding(3)
514var<storage, read_write> output: array<f32>;
515
516const BASIS_COUNT: u32 = 8u; // For 3D Clifford algebra
517
518@compute @workgroup_size(1)
519fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
520 let batch_idx = global_id.x;
521 let offset = batch_idx * BASIS_COUNT;
522
523 // Clear output
524 for (var k = 0u; k < BASIS_COUNT; k = k + 1u) {
525 output[offset + k] = 0.0;
526 }
527
528 // Compute geometric product
529 for (var i = 0u; i < BASIS_COUNT; i = i + 1u) {
530 let a_coeff = a_batch[offset + i];
531 if (abs(a_coeff) < 1e-14) {
532 continue;
533 }
534
535 for (var j = 0u; j < BASIS_COUNT; j = j + 1u) {
536 let b_coeff = b_batch[offset + j];
537 if (abs(b_coeff) < 1e-14) {
538 continue;
539 }
540
541 let table_idx = i * BASIS_COUNT + j;
542 let entry = cayley_table[table_idx];
543 output[offset + entry.index] += entry.sign * a_coeff * b_coeff;
544 }
545 }
546}
547"#;
548
549pub struct AdaptiveCompute {
551 gpu: Option<GpuCliffordAlgebra>,
552}
553
554impl AdaptiveCompute {
555 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Self {
557 let gpu = GpuCliffordAlgebra::new::<P, Q, R>().await.ok();
558 Self { gpu }
559 }
560
561 pub async fn geometric_product<const P: usize, const Q: usize, const R: usize>(
563 &self,
564 a: &Multivector<P, Q, R>,
565 b: &Multivector<P, Q, R>,
566 ) -> Multivector<P, Q, R> {
567 a.geometric_product(b)
569 }
570
571 pub async fn batch_geometric_product(
573 &self,
574 a_batch: &[f64],
575 b_batch: &[f64],
576 ) -> Result<Vec<f64>, GpuError> {
577 let batch_size = a_batch.len() / 8; if let Some(gpu) = &self.gpu {
580 if GpuCliffordAlgebra::should_use_gpu(batch_size) {
581 return gpu.batch_geometric_product(a_batch, b_batch).await;
582 }
583 }
584
585 let mut result = Vec::with_capacity(a_batch.len());
587 for i in 0..batch_size {
588 let start = i * 8;
589 let end = start + 8;
590
591 let a = Multivector::<3, 0, 0>::from_coefficients(a_batch[start..end].to_vec());
592 let b = Multivector::<3, 0, 0>::from_coefficients(b_batch[start..end].to_vec());
593 let product = a.geometric_product(&b);
594
595 for j in 0..8 {
596 result.push(product.get(j));
597 }
598 }
599
600 Ok(result)
601 }
602}
603
604pub struct GpuInfoGeometry {
615 device: wgpu::Device,
616 queue: wgpu::Queue,
617 tensor_pipeline: wgpu::ComputePipeline,
618 #[allow(dead_code)]
619 fisher_pipeline: wgpu::ComputePipeline,
620 #[allow(dead_code)]
621 divergence_pipeline: wgpu::ComputePipeline,
622}
623
624impl GpuInfoGeometry {
625 pub async fn new() -> Result<Self, GpuError> {
627 let instance = wgpu::Instance::default();
628
629 let adapter = if let Some(adapter) = instance
631 .request_adapter(&wgpu::RequestAdapterOptions {
632 power_preference: wgpu::PowerPreference::HighPerformance,
633 compatible_surface: None,
634 force_fallback_adapter: false,
635 })
636 .await
637 {
638 adapter
639 } else if let Some(adapter) = instance
640 .request_adapter(&wgpu::RequestAdapterOptions {
641 power_preference: wgpu::PowerPreference::LowPower,
642 compatible_surface: None,
643 force_fallback_adapter: false,
644 })
645 .await
646 {
647 adapter
648 } else if let Some(adapter) = instance
649 .request_adapter(&wgpu::RequestAdapterOptions {
650 power_preference: wgpu::PowerPreference::None,
651 compatible_surface: None,
652 force_fallback_adapter: true,
653 })
654 .await
655 {
656 adapter
657 } else {
658 return Err(GpuError::InitializationError(
659 "No GPU adapter found".to_string(),
660 ));
661 };
662
663 let (device, queue) = adapter
664 .request_device(
665 &wgpu::DeviceDescriptor {
666 label: Some("Amari GPU Info Geometry Device"),
667 required_features: wgpu::Features::empty(),
668 required_limits: wgpu::Limits::default(),
669 },
670 None,
671 )
672 .await
673 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
674
675 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
677 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
678 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
679
680 Ok(Self {
681 device,
682 queue,
683 tensor_pipeline,
684 fisher_pipeline,
685 divergence_pipeline,
686 })
687 }
688
689 pub async fn new_with_device_preference(device_type: &str) -> Result<Self, GpuError> {
691 let (power_preference, force_fallback) = match device_type {
692 "high-performance" => (wgpu::PowerPreference::HighPerformance, false),
693 "low-power" => (wgpu::PowerPreference::LowPower, false),
694 "fallback" => (wgpu::PowerPreference::None, true),
695 _ => {
696 return Err(GpuError::InitializationError(
697 "Invalid device type".to_string(),
698 ))
699 }
700 };
701
702 let instance = wgpu::Instance::default();
703
704 let adapter = instance
705 .request_adapter(&wgpu::RequestAdapterOptions {
706 power_preference,
707 compatible_surface: None,
708 force_fallback_adapter: force_fallback,
709 })
710 .await
711 .ok_or_else(|| {
712 GpuError::InitializationError("No suitable adapter found".to_string())
713 })?;
714
715 let (device, queue) = adapter
716 .request_device(
717 &wgpu::DeviceDescriptor {
718 label: Some("Amari GPU Info Geometry Device"),
719 required_features: wgpu::Features::empty(),
720 required_limits: wgpu::Limits::default(),
721 },
722 None,
723 )
724 .await
725 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
726
727 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
728 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
729 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
730
731 Ok(Self {
732 device,
733 queue,
734 tensor_pipeline,
735 fisher_pipeline,
736 divergence_pipeline,
737 })
738 }
739
740 pub async fn amari_chentsov_tensor(
742 &self,
743 x: &Multivector<3, 0, 0>,
744 y: &Multivector<3, 0, 0>,
745 z: &Multivector<3, 0, 0>,
746 ) -> Result<f64, GpuError> {
747 Ok(amari_chentsov_tensor(x, y, z))
749 }
750
751 pub async fn amari_chentsov_tensor_batch(
762 &self,
763 x_batch: &[Multivector<3, 0, 0>],
764 y_batch: &[Multivector<3, 0, 0>],
765 z_batch: &[Multivector<3, 0, 0>],
766 ) -> Result<Vec<f64>, GpuError> {
767 let batch_size = x_batch.len();
768 if batch_size == 0 {
769 return Ok(Vec::new());
770 }
771
772 if batch_size < 100 {
774 let results = x_batch
775 .iter()
776 .zip(y_batch.iter())
777 .zip(z_batch.iter())
778 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
779 .collect();
780 return Ok(results);
781 }
782
783 let results = x_batch
787 .iter()
788 .zip(y_batch.iter())
789 .zip(z_batch.iter())
790 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
791 .collect();
792 Ok(results)
793 }
794
795 pub async fn amari_chentsov_tensor_from_typed_arrays(
797 &self,
798 flat_data: &[f64],
799 batch_size: usize,
800 ) -> Result<Vec<f64>, GpuError> {
801 if flat_data.len() != batch_size * 9 {
802 return Err(GpuError::BufferError("Invalid flat data size".to_string()));
803 }
804
805 let mut x_batch = Vec::with_capacity(batch_size);
807 let mut y_batch = Vec::with_capacity(batch_size);
808 let mut z_batch = Vec::with_capacity(batch_size);
809
810 for i in 0..batch_size {
811 let base = i * 9;
812 let mut x = Multivector::zero();
813 let mut y = Multivector::zero();
814 let mut z = Multivector::zero();
815
816 x.set_vector_component(0, flat_data[base]);
818 x.set_vector_component(1, flat_data[base + 1]);
819 x.set_vector_component(2, flat_data[base + 2]);
820
821 y.set_vector_component(0, flat_data[base + 3]);
822 y.set_vector_component(1, flat_data[base + 4]);
823 y.set_vector_component(2, flat_data[base + 5]);
824
825 z.set_vector_component(0, flat_data[base + 6]);
826 z.set_vector_component(1, flat_data[base + 7]);
827 z.set_vector_component(2, flat_data[base + 8]);
828
829 x_batch.push(x);
830 y_batch.push(y);
831 z_batch.push(z);
832 }
833
834 self.amari_chentsov_tensor_batch(&x_batch, &y_batch, &z_batch)
835 .await
836 }
837
838 pub async fn device_info(&self) -> Result<GpuDeviceInfo, GpuError> {
840 Ok(GpuDeviceInfo::new(true, "WebGPU Device"))
841 }
842
843 pub async fn memory_usage(&self) -> Result<u64, GpuError> {
845 Ok(1024 * 1024) }
848
849 pub async fn fisher_information_matrix(
851 &self,
852 _parameters: &[f64],
853 ) -> Result<GpuFisherMatrix, GpuError> {
854 Ok(GpuFisherMatrix::new(vec![vec![1.0, 0.0], vec![0.0, 1.0]]))
856 }
857
858 pub async fn bregman_divergence_batch(
860 &self,
861 p_batch: &[Vec<f64>],
862 q_batch: &[Vec<f64>],
863 ) -> Result<Vec<f64>, GpuError> {
864 let results = p_batch
866 .iter()
867 .zip(q_batch.iter())
868 .map(|(p, q)| {
869 p.iter()
871 .zip(q.iter())
872 .map(|(pi, qi)| {
873 if *pi > 0.0 && *qi > 0.0 {
874 pi * (pi / qi).ln()
875 } else {
876 0.0
877 }
878 })
879 .sum()
880 })
881 .collect();
882 Ok(results)
883 }
884
885 #[allow(dead_code)] async fn compute_tensor_batch_gpu(
897 &self,
898 x_batch: &[Multivector<3, 0, 0>],
899 y_batch: &[Multivector<3, 0, 0>],
900 z_batch: &[Multivector<3, 0, 0>],
901 ) -> Result<Vec<f64>, GpuError> {
902 let batch_size = x_batch.len();
903
904 let x_data: Vec<f32> = x_batch
906 .iter()
907 .flat_map(|mv| {
908 vec![
909 mv.vector_component(0) as f32,
910 mv.vector_component(1) as f32,
911 mv.vector_component(2) as f32,
912 ]
913 })
914 .collect();
915
916 let y_data: Vec<f32> = y_batch
917 .iter()
918 .flat_map(|mv| {
919 vec![
920 mv.vector_component(0) as f32,
921 mv.vector_component(1) as f32,
922 mv.vector_component(2) as f32,
923 ]
924 })
925 .collect();
926
927 let z_data: Vec<f32> = z_batch
928 .iter()
929 .flat_map(|mv| {
930 vec![
931 mv.vector_component(0) as f32,
932 mv.vector_component(1) as f32,
933 mv.vector_component(2) as f32,
934 ]
935 })
936 .collect();
937
938 let x_buffer = self
940 .device
941 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
942 label: Some("X Batch Buffer"),
943 contents: bytemuck::cast_slice(&x_data),
944 usage: wgpu::BufferUsages::STORAGE,
945 });
946
947 let y_buffer = self
948 .device
949 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
950 label: Some("Y Batch Buffer"),
951 contents: bytemuck::cast_slice(&y_data),
952 usage: wgpu::BufferUsages::STORAGE,
953 });
954
955 let z_buffer = self
956 .device
957 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
958 label: Some("Z Batch Buffer"),
959 contents: bytemuck::cast_slice(&z_data),
960 usage: wgpu::BufferUsages::STORAGE,
961 });
962
963 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
964 label: Some("Output Buffer"),
965 size: (batch_size * 4) as u64, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
967 mapped_at_creation: false,
968 });
969
970 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
971 label: Some("Staging Buffer"),
972 size: (batch_size * 4) as u64,
973 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
974 mapped_at_creation: false,
975 });
976
977 let bind_group_layout = self.tensor_pipeline.get_bind_group_layout(0);
979 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
980 label: Some("Tensor Compute Bind Group"),
981 layout: &bind_group_layout,
982 entries: &[
983 wgpu::BindGroupEntry {
984 binding: 0,
985 resource: x_buffer.as_entire_binding(),
986 },
987 wgpu::BindGroupEntry {
988 binding: 1,
989 resource: y_buffer.as_entire_binding(),
990 },
991 wgpu::BindGroupEntry {
992 binding: 2,
993 resource: z_buffer.as_entire_binding(),
994 },
995 wgpu::BindGroupEntry {
996 binding: 3,
997 resource: output_buffer.as_entire_binding(),
998 },
999 ],
1000 });
1001
1002 let mut encoder = self
1004 .device
1005 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1006 label: Some("Tensor Compute Encoder"),
1007 });
1008
1009 {
1010 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1011 label: Some("Tensor Compute Pass"),
1012 timestamp_writes: None,
1013 });
1014 compute_pass.set_pipeline(&self.tensor_pipeline);
1015 compute_pass.set_bind_group(0, &bind_group, &[]);
1016 let workgroup_count = batch_size.div_ceil(64); compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
1018 }
1019
1020 encoder.copy_buffer_to_buffer(
1021 &output_buffer,
1022 0,
1023 &staging_buffer,
1024 0,
1025 (batch_size * 4) as u64,
1026 );
1027
1028 self.queue.submit(std::iter::once(encoder.finish()));
1029
1030 let buffer_slice = staging_buffer.slice(..);
1032 let (sender, receiver) = futures::channel::oneshot::channel();
1033 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
1034 let _ = sender.send(result);
1035 });
1036
1037 self.device.poll(wgpu::Maintain::Wait);
1038
1039 receiver
1040 .await
1041 .map_err(|_| GpuError::BufferError("Failed to receive buffer map result".to_string()))?
1042 .map_err(|e| GpuError::BufferError(format!("Buffer mapping failed: {:?}", e)))?;
1043
1044 let data = buffer_slice.get_mapped_range();
1045 let result_f32: &[f32] = bytemuck::cast_slice(&data);
1046 let results: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
1047
1048 drop(data);
1049 staging_buffer.unmap();
1050
1051 Ok(results)
1052 }
1053
1054 fn create_tensor_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
1055 let shader_source = TENSOR_COMPUTE_SHADER;
1056 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
1057 label: Some("Tensor Compute Shader"),
1058 source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(shader_source)),
1059 });
1060
1061 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1062 label: Some("Tensor Compute Pipeline"),
1063 layout: None,
1064 module: &shader,
1065 entry_point: "main",
1066 });
1067
1068 Ok(compute_pipeline)
1069 }
1070
1071 fn create_fisher_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
1072 Self::create_tensor_pipeline(device)
1074 }
1075
1076 fn create_divergence_pipeline(
1077 device: &wgpu::Device,
1078 ) -> Result<wgpu::ComputePipeline, GpuError> {
1079 Self::create_tensor_pipeline(device)
1081 }
1082}
1083
1084pub struct GpuDeviceInfo {
1086 is_gpu: bool,
1087 #[allow(dead_code)]
1088 description: String,
1089}
1090
1091impl GpuDeviceInfo {
1092 fn new(is_gpu: bool, description: &str) -> Self {
1093 Self {
1094 is_gpu,
1095 description: description.to_string(),
1096 }
1097 }
1098
1099 pub fn is_gpu(&self) -> bool {
1100 self.is_gpu
1101 }
1102
1103 pub fn supports_webgpu(&self) -> bool {
1104 self.is_gpu
1105 }
1106
1107 pub fn is_initialized(&self) -> bool {
1108 true
1109 }
1110}
1111
1112pub struct GpuFisherMatrix {
1114 matrix: Vec<Vec<f64>>,
1115}
1116
1117impl GpuFisherMatrix {
1118 fn new(matrix: Vec<Vec<f64>>) -> Self {
1119 Self { matrix }
1120 }
1121
1122 pub async fn eigenvalues(&self) -> Result<Vec<f64>, GpuError> {
1123 let mut eigenvals = Vec::new();
1125 for i in 0..self.matrix.len() {
1126 if i < self.matrix[i].len() {
1127 eigenvals.push(self.matrix[i][i]);
1128 }
1129 }
1130 Ok(eigenvals)
1131 }
1132}
1133
1134const TENSOR_COMPUTE_SHADER: &str = r#"
1136@group(0) @binding(0)
1137var<storage, read> x_batch: array<vec3<f32>>;
1138
1139@group(0) @binding(1)
1140var<storage, read> y_batch: array<vec3<f32>>;
1141
1142@group(0) @binding(2)
1143var<storage, read> z_batch: array<vec3<f32>>;
1144
1145@group(0) @binding(3)
1146var<storage, read_write> output: array<f32>;
1147
1148@compute @workgroup_size(64)
1149fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1150 let idx = global_id.x;
1151 if (idx >= arrayLength(&x_batch)) {
1152 return;
1153 }
1154
1155 let x = x_batch[idx];
1156 let y = y_batch[idx];
1157 let z = z_batch[idx];
1158
1159 // Compute scalar triple product: x · (y × z)
1160 let cross_yz = cross(y, z);
1161 let scalar_triple = dot(x, cross_yz);
1162
1163 output[idx] = scalar_triple;
1164}
1165"#;
1166
1167#[cfg(test)]
1168mod tests {
1169 use super::*;
1170
1171 #[test]
1172 fn test_should_use_gpu() {
1173 assert!(!GpuCliffordAlgebra::should_use_gpu(10));
1174 assert!(GpuCliffordAlgebra::should_use_gpu(1000));
1175 }
1176
1177 #[tokio::test]
1178 async fn test_gpu_info_geometry_creation() {
1179 if std::env::var("CI").is_ok()
1181 || std::env::var("GITHUB_ACTIONS").is_ok()
1182 || std::env::var("DISPLAY").is_err()
1183 {
1184 println!("Skipping GPU test in CI environment");
1185 return;
1186 }
1187
1188 match GpuInfoGeometry::new().await {
1190 Ok(_) => {
1191 println!("GPU initialization successful");
1193 }
1194 Err(GpuError::InitializationError(_)) => {
1195 println!("GPU initialization failed - no GPU available");
1197 }
1198 Err(e) => panic!("Unexpected error: {:?}", e),
1199 }
1200 }
1201}