1pub mod adaptive;
110#[cfg(feature = "automata")]
111pub mod automata;
112pub mod benchmarks;
113#[cfg(feature = "calculus")]
114pub mod calculus;
115#[cfg(feature = "dual")]
116pub mod dual;
117#[cfg(feature = "enumerative")]
118pub mod enumerative;
119#[cfg(feature = "functional")]
120pub mod functional;
121#[cfg(feature = "gf2")]
122pub mod gf2;
123#[cfg(feature = "holographic")]
128pub mod holographic;
129#[cfg(feature = "measure")]
130pub mod measure;
131pub mod multi_gpu;
132pub mod network;
133pub mod performance;
134#[cfg(feature = "probabilistic")]
135pub mod probabilistic;
136pub mod relativistic;
137pub mod shaders;
138pub mod timeline;
139#[cfg(feature = "topology")]
144pub mod topology;
145pub mod unified;
146pub mod verification;
147
148pub use adaptive::{
149 AdaptiveVerificationError, AdaptiveVerificationLevel, AdaptiveVerifier, CpuFeatures,
150 GpuBackend, PlatformCapabilities, PlatformPerformanceProfile, VerificationPlatform,
151 WasmEnvironment,
152};
153use amari_core::Multivector;
154use amari_info_geom::amari_chentsov_tensor;
155pub use benchmarks::{
156 AmariMultiGpuBenchmarks, BenchmarkConfig, BenchmarkResult, BenchmarkRunner,
157 BenchmarkSuiteResults, BenchmarkSummary, ScalingAnalysis,
158};
159use bytemuck::{Pod, Zeroable};
160#[cfg(feature = "calculus")]
161pub use calculus::GpuCalculus;
162#[cfg(feature = "functional")]
163pub use functional::{
164 AdaptiveFunctionalCompute, GpuFunctionalError, GpuFunctionalResult, GpuHilbertSpace,
165 GpuMatrixOperator, GpuSpectralDecomposition,
166};
167#[cfg(feature = "gf2")]
168pub use gf2::{
169 GF2GpuError, GF2GpuOps, GF2GpuResult, GpuGF2CliffordPair, GpuGF2HammingPair, GpuGF2MatVecData,
170};
171#[cfg(feature = "holographic")]
172pub use holographic::{
173 GpuHolographic, GpuHolographicError, GpuHolographicMemory, GpuHolographicResult,
174 GpuOpticalField,
175};
176#[cfg(feature = "measure")]
177pub use measure::{
178 GpuIntegrator, GpuMonteCarloIntegrator, GpuMultidimIntegrator, GpuParametricDensity,
179 GpuTropicalMeasure,
180};
181pub use multi_gpu::{
182 ComputeIntensity, DeviceCapabilities, DeviceId, DeviceWorkload, GpuArchitecture, GpuDevice,
183 IntelligentLoadBalancer, LoadBalancingStrategy, MultiGpuBarrier, PerformanceRecord,
184 PerformanceStats, SynchronizationManager, Workload, WorkloadCoordinator,
185};
186pub use network::{AdaptiveNetworkCompute, GpuGeometricNetwork, GpuNetworkError, GpuNetworkResult};
187pub use performance::{
188 AdaptiveDispatchPolicy, CalibrationResult, GpuProfile, GpuProfiler, WorkgroupConfig,
189 WorkgroupOptimizer,
190};
191#[cfg(feature = "probabilistic")]
192pub use probabilistic::{GpuProbabilistic, GpuProbabilisticError, GpuProbabilisticResult};
193pub use relativistic::{
194 GpuRelativisticParticle, GpuRelativisticPhysics, GpuSpacetimeVector, GpuTrajectoryParams,
195};
196pub use shaders::{
197 ShaderLibrary, DUAL_SHADERS, FUSION_SHADERS, TOPOLOGY_SHADERS, TROPICAL_SHADERS,
198};
199use thiserror::Error;
200pub use timeline::{
201 BottleneckAnalysis, DeviceUtilizationStats, GpuTimelineAnalyzer, MultiGpuPerformanceMonitor,
202 OptimizationRecommendation, PerformanceAnalysisReport, PerformanceBottleneck,
203 PerformanceSummary, RecommendationPriority, SynchronizationAnalysis, TimelineEvent,
204 UtilizationAnalysis,
205};
206#[cfg(feature = "topology")]
207pub use topology::{
208 AdaptiveTopologyCompute, GpuCriticalPoint, GpuTopology, GpuTopologyError, GpuTopologyResult,
209};
210pub use unified::{
211 BufferPoolStats, EnhancedGpuBufferPool, GpuAccelerated, GpuContext, GpuDispatcher,
212 GpuOperationParams, GpuParam, MultiGpuStats, PoolEntryStats, SharedGpuContext, UnifiedGpuError,
213 UnifiedGpuResult,
214};
215pub use verification::{
216 GpuBoundaryVerifier, GpuVerificationError, RelativisticVerifier, StatisticalGpuVerifier,
217 VerificationConfig, VerificationStrategy, VerifiedMultivector,
218};
219use wgpu::util::DeviceExt;
220
221#[derive(Error, Debug)]
222pub enum GpuError {
223 #[error("Failed to initialize GPU: {0}")]
224 InitializationError(String),
225
226 #[error("GPU buffer error: {0}")]
227 BufferError(String),
228
229 #[error("Shader compilation error: {0}")]
230 ShaderError(String),
231}
232
233pub struct GpuCliffordAlgebra {
235 device: wgpu::Device,
236 queue: wgpu::Queue,
237 compute_pipeline: wgpu::ComputePipeline,
238 cayley_buffer: wgpu::Buffer,
239 #[allow(dead_code)]
240 dim: usize,
241 basis_count: usize,
242}
243
244impl GpuCliffordAlgebra {
245 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Result<Self, GpuError> {
247 let instance = wgpu::Instance::default();
248
249 let adapter = instance
250 .request_adapter(&wgpu::RequestAdapterOptions {
251 power_preference: wgpu::PowerPreference::HighPerformance,
252 compatible_surface: None,
253 force_fallback_adapter: false,
254 })
255 .await
256 .ok_or_else(|| GpuError::InitializationError("No GPU adapter found".to_string()))?;
257
258 let (device, queue) = adapter
259 .request_device(
260 &wgpu::DeviceDescriptor {
261 label: Some("Amari GPU Device"),
262 required_features: wgpu::Features::empty(),
263 required_limits: wgpu::Limits::default(),
264 },
265 None,
266 )
267 .await
268 .map_err(|e| GpuError::InitializationError(e.to_string()))?;
269
270 let dim = P + Q + R;
271 let basis_count = 1 << dim;
272
273 let cayley_table = Self::generate_cayley_table::<P, Q, R>();
275 let cayley_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
276 label: Some("Cayley Table"),
277 contents: bytemuck::cast_slice(&cayley_table),
278 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
279 });
280
281 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
283 label: Some("Geometric Product Shader"),
284 source: wgpu::ShaderSource::Wgsl(GEOMETRIC_PRODUCT_SHADER.into()),
285 });
286
287 let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
288 label: Some("Compute Bind Group Layout"),
289 entries: &[
290 wgpu::BindGroupLayoutEntry {
291 binding: 0,
292 visibility: wgpu::ShaderStages::COMPUTE,
293 ty: wgpu::BindingType::Buffer {
294 ty: wgpu::BufferBindingType::Storage { read_only: true },
295 has_dynamic_offset: false,
296 min_binding_size: None,
297 },
298 count: None,
299 },
300 wgpu::BindGroupLayoutEntry {
301 binding: 1,
302 visibility: wgpu::ShaderStages::COMPUTE,
303 ty: wgpu::BindingType::Buffer {
304 ty: wgpu::BufferBindingType::Storage { read_only: true },
305 has_dynamic_offset: false,
306 min_binding_size: None,
307 },
308 count: None,
309 },
310 wgpu::BindGroupLayoutEntry {
311 binding: 2,
312 visibility: wgpu::ShaderStages::COMPUTE,
313 ty: wgpu::BindingType::Buffer {
314 ty: wgpu::BufferBindingType::Storage { read_only: true },
315 has_dynamic_offset: false,
316 min_binding_size: None,
317 },
318 count: None,
319 },
320 wgpu::BindGroupLayoutEntry {
321 binding: 3,
322 visibility: wgpu::ShaderStages::COMPUTE,
323 ty: wgpu::BindingType::Buffer {
324 ty: wgpu::BufferBindingType::Storage { read_only: false },
325 has_dynamic_offset: false,
326 min_binding_size: None,
327 },
328 count: None,
329 },
330 ],
331 });
332
333 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
334 label: Some("Compute Pipeline Layout"),
335 bind_group_layouts: &[&bind_group_layout],
336 push_constant_ranges: &[],
337 });
338
339 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
340 label: Some("Geometric Product Pipeline"),
341 layout: Some(&pipeline_layout),
342 module: &shader,
343 entry_point: "main",
344 });
345
346 Ok(Self {
347 device,
348 queue,
349 compute_pipeline,
350 cayley_buffer,
351 dim,
352 basis_count,
353 })
354 }
355
356 fn generate_cayley_table<const P: usize, const Q: usize, const R: usize>() -> Vec<CayleyEntry> {
358 use amari_core::cayley::CayleyTable;
359
360 let table = CayleyTable::<P, Q, R>::get();
361 let basis_count = 1 << (P + Q + R);
362 let mut flat_table = Vec::with_capacity(basis_count * basis_count);
363
364 for i in 0..basis_count {
365 for j in 0..basis_count {
366 let (sign, index) = table.get_product(i, j);
367 flat_table.push(CayleyEntry {
368 sign: sign as f32,
369 index: index as u32,
370 });
371 }
372 }
373
374 flat_table
375 }
376
377 pub async fn batch_geometric_product(
379 &self,
380 a_batch: &[f64],
381 b_batch: &[f64],
382 ) -> Result<Vec<f64>, GpuError> {
383 let batch_size = a_batch.len() / self.basis_count;
384
385 if a_batch.len() != b_batch.len() {
386 return Err(GpuError::BufferError(
387 "Input batches must have same size".to_string(),
388 ));
389 }
390
391 let a_f32: Vec<f32> = a_batch.iter().map(|&x| x as f32).collect();
393 let b_f32: Vec<f32> = b_batch.iter().map(|&x| x as f32).collect();
394
395 let a_buffer = self
397 .device
398 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
399 label: Some("A Buffer"),
400 contents: bytemuck::cast_slice(&a_f32),
401 usage: wgpu::BufferUsages::STORAGE,
402 });
403
404 let b_buffer = self
405 .device
406 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
407 label: Some("B Buffer"),
408 contents: bytemuck::cast_slice(&b_f32),
409 usage: wgpu::BufferUsages::STORAGE,
410 });
411
412 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
413 label: Some("Output Buffer"),
414 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
415 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
416 mapped_at_creation: false,
417 });
418
419 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
420 label: Some("Staging Buffer"),
421 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
422 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
423 mapped_at_creation: false,
424 });
425
426 let bind_group_layout = self.compute_pipeline.get_bind_group_layout(0);
428 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
429 label: Some("Compute Bind Group"),
430 layout: &bind_group_layout,
431 entries: &[
432 wgpu::BindGroupEntry {
433 binding: 0,
434 resource: self.cayley_buffer.as_entire_binding(),
435 },
436 wgpu::BindGroupEntry {
437 binding: 1,
438 resource: a_buffer.as_entire_binding(),
439 },
440 wgpu::BindGroupEntry {
441 binding: 2,
442 resource: b_buffer.as_entire_binding(),
443 },
444 wgpu::BindGroupEntry {
445 binding: 3,
446 resource: output_buffer.as_entire_binding(),
447 },
448 ],
449 });
450
451 let mut encoder = self
453 .device
454 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
455 label: Some("Compute Encoder"),
456 });
457
458 {
459 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
460 label: Some("Compute Pass"),
461 timestamp_writes: None,
462 });
463
464 compute_pass.set_pipeline(&self.compute_pipeline);
465 compute_pass.set_bind_group(0, &bind_group, &[]);
466 compute_pass.dispatch_workgroups(batch_size as u32, 1, 1);
467 }
468
469 encoder.copy_buffer_to_buffer(
470 &output_buffer,
471 0,
472 &staging_buffer,
473 0,
474 (a_batch.len() * std::mem::size_of::<f32>()) as u64,
475 );
476
477 self.queue.submit(Some(encoder.finish()));
478
479 let buffer_slice = staging_buffer.slice(..);
481 let (sender, receiver) = futures::channel::oneshot::channel();
482 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
483 sender.send(result).unwrap();
484 });
485
486 self.device.poll(wgpu::Maintain::Wait);
487 receiver
488 .await
489 .unwrap()
490 .map_err(|e| GpuError::BufferError(e.to_string()))?;
491
492 let data = buffer_slice.get_mapped_range();
493 let result_f32: &[f32] = bytemuck::cast_slice(&data);
494 let result: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
495
496 drop(data);
497 staging_buffer.unmap();
498
499 Ok(result)
500 }
501
502 pub fn should_use_gpu(operation_count: usize) -> bool {
504 operation_count >= 100
506 }
507}
508
509#[repr(C)]
511#[derive(Copy, Clone, Pod, Zeroable)]
512struct CayleyEntry {
513 sign: f32,
514 index: u32,
515}
516
517const GEOMETRIC_PRODUCT_SHADER: &str = r#"
519struct CayleyEntry {
520 sign: f32,
521 index: u32,
522}
523
524@group(0) @binding(0)
525var<storage, read> cayley_table: array<CayleyEntry>;
526
527@group(0) @binding(1)
528var<storage, read> a_batch: array<f32>;
529
530@group(0) @binding(2)
531var<storage, read> b_batch: array<f32>;
532
533@group(0) @binding(3)
534var<storage, read_write> output: array<f32>;
535
536const BASIS_COUNT: u32 = 8u; // For 3D Clifford algebra
537
538@compute @workgroup_size(1)
539fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
540 let batch_idx = global_id.x;
541 let offset = batch_idx * BASIS_COUNT;
542
543 // Clear output
544 for (var k = 0u; k < BASIS_COUNT; k = k + 1u) {
545 output[offset + k] = 0.0;
546 }
547
548 // Compute geometric product
549 for (var i = 0u; i < BASIS_COUNT; i = i + 1u) {
550 let a_coeff = a_batch[offset + i];
551 if (abs(a_coeff) < 1e-14) {
552 continue;
553 }
554
555 for (var j = 0u; j < BASIS_COUNT; j = j + 1u) {
556 let b_coeff = b_batch[offset + j];
557 if (abs(b_coeff) < 1e-14) {
558 continue;
559 }
560
561 let table_idx = i * BASIS_COUNT + j;
562 let entry = cayley_table[table_idx];
563 output[offset + entry.index] += entry.sign * a_coeff * b_coeff;
564 }
565 }
566}
567"#;
568
569pub struct AdaptiveCompute {
571 gpu: Option<GpuCliffordAlgebra>,
572}
573
574impl AdaptiveCompute {
575 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Self {
577 let gpu = GpuCliffordAlgebra::new::<P, Q, R>().await.ok();
578 Self { gpu }
579 }
580
581 pub async fn geometric_product<const P: usize, const Q: usize, const R: usize>(
583 &self,
584 a: &Multivector<P, Q, R>,
585 b: &Multivector<P, Q, R>,
586 ) -> Multivector<P, Q, R> {
587 a.geometric_product(b)
589 }
590
591 pub async fn batch_geometric_product(
593 &self,
594 a_batch: &[f64],
595 b_batch: &[f64],
596 ) -> Result<Vec<f64>, GpuError> {
597 let batch_size = a_batch.len() / 8; if let Some(gpu) = &self.gpu {
600 if GpuCliffordAlgebra::should_use_gpu(batch_size) {
601 return gpu.batch_geometric_product(a_batch, b_batch).await;
602 }
603 }
604
605 let mut result = Vec::with_capacity(a_batch.len());
607 for i in 0..batch_size {
608 let start = i * 8;
609 let end = start + 8;
610
611 let a = Multivector::<3, 0, 0>::from_coefficients(a_batch[start..end].to_vec());
612 let b = Multivector::<3, 0, 0>::from_coefficients(b_batch[start..end].to_vec());
613 let product = a.geometric_product(&b);
614
615 for j in 0..8 {
616 result.push(product.get(j));
617 }
618 }
619
620 Ok(result)
621 }
622}
623
624pub struct GpuInfoGeometry {
635 device: wgpu::Device,
636 queue: wgpu::Queue,
637 tensor_pipeline: wgpu::ComputePipeline,
638 #[allow(dead_code)]
639 fisher_pipeline: wgpu::ComputePipeline,
640 #[allow(dead_code)]
641 divergence_pipeline: wgpu::ComputePipeline,
642}
643
644impl GpuInfoGeometry {
645 pub async fn new() -> Result<Self, GpuError> {
647 let instance = wgpu::Instance::default();
648
649 let adapter = if let Some(adapter) = instance
651 .request_adapter(&wgpu::RequestAdapterOptions {
652 power_preference: wgpu::PowerPreference::HighPerformance,
653 compatible_surface: None,
654 force_fallback_adapter: false,
655 })
656 .await
657 {
658 adapter
659 } else if let Some(adapter) = instance
660 .request_adapter(&wgpu::RequestAdapterOptions {
661 power_preference: wgpu::PowerPreference::LowPower,
662 compatible_surface: None,
663 force_fallback_adapter: false,
664 })
665 .await
666 {
667 adapter
668 } else if let Some(adapter) = instance
669 .request_adapter(&wgpu::RequestAdapterOptions {
670 power_preference: wgpu::PowerPreference::None,
671 compatible_surface: None,
672 force_fallback_adapter: true,
673 })
674 .await
675 {
676 adapter
677 } else {
678 return Err(GpuError::InitializationError(
679 "No GPU adapter found".to_string(),
680 ));
681 };
682
683 let (device, queue) = adapter
684 .request_device(
685 &wgpu::DeviceDescriptor {
686 label: Some("Amari GPU Info Geometry Device"),
687 required_features: wgpu::Features::empty(),
688 required_limits: wgpu::Limits::default(),
689 },
690 None,
691 )
692 .await
693 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
694
695 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
697 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
698 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
699
700 Ok(Self {
701 device,
702 queue,
703 tensor_pipeline,
704 fisher_pipeline,
705 divergence_pipeline,
706 })
707 }
708
709 pub async fn new_with_device_preference(device_type: &str) -> Result<Self, GpuError> {
711 let (power_preference, force_fallback) = match device_type {
712 "high-performance" => (wgpu::PowerPreference::HighPerformance, false),
713 "low-power" => (wgpu::PowerPreference::LowPower, false),
714 "fallback" => (wgpu::PowerPreference::None, true),
715 _ => {
716 return Err(GpuError::InitializationError(
717 "Invalid device type".to_string(),
718 ))
719 }
720 };
721
722 let instance = wgpu::Instance::default();
723
724 let adapter = instance
725 .request_adapter(&wgpu::RequestAdapterOptions {
726 power_preference,
727 compatible_surface: None,
728 force_fallback_adapter: force_fallback,
729 })
730 .await
731 .ok_or_else(|| {
732 GpuError::InitializationError("No suitable adapter found".to_string())
733 })?;
734
735 let (device, queue) = adapter
736 .request_device(
737 &wgpu::DeviceDescriptor {
738 label: Some("Amari GPU Info Geometry Device"),
739 required_features: wgpu::Features::empty(),
740 required_limits: wgpu::Limits::default(),
741 },
742 None,
743 )
744 .await
745 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
746
747 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
748 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
749 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
750
751 Ok(Self {
752 device,
753 queue,
754 tensor_pipeline,
755 fisher_pipeline,
756 divergence_pipeline,
757 })
758 }
759
760 pub async fn amari_chentsov_tensor(
762 &self,
763 x: &Multivector<3, 0, 0>,
764 y: &Multivector<3, 0, 0>,
765 z: &Multivector<3, 0, 0>,
766 ) -> Result<f64, GpuError> {
767 Ok(amari_chentsov_tensor(x, y, z))
769 }
770
771 pub async fn amari_chentsov_tensor_batch(
782 &self,
783 x_batch: &[Multivector<3, 0, 0>],
784 y_batch: &[Multivector<3, 0, 0>],
785 z_batch: &[Multivector<3, 0, 0>],
786 ) -> Result<Vec<f64>, GpuError> {
787 let batch_size = x_batch.len();
788 if batch_size == 0 {
789 return Ok(Vec::new());
790 }
791
792 if batch_size < 100 {
794 let results = x_batch
795 .iter()
796 .zip(y_batch.iter())
797 .zip(z_batch.iter())
798 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
799 .collect();
800 return Ok(results);
801 }
802
803 let results = x_batch
807 .iter()
808 .zip(y_batch.iter())
809 .zip(z_batch.iter())
810 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
811 .collect();
812 Ok(results)
813 }
814
815 pub async fn amari_chentsov_tensor_from_typed_arrays(
817 &self,
818 flat_data: &[f64],
819 batch_size: usize,
820 ) -> Result<Vec<f64>, GpuError> {
821 if flat_data.len() != batch_size * 9 {
822 return Err(GpuError::BufferError("Invalid flat data size".to_string()));
823 }
824
825 let mut x_batch = Vec::with_capacity(batch_size);
827 let mut y_batch = Vec::with_capacity(batch_size);
828 let mut z_batch = Vec::with_capacity(batch_size);
829
830 for i in 0..batch_size {
831 let base = i * 9;
832 let mut x = Multivector::zero();
833 let mut y = Multivector::zero();
834 let mut z = Multivector::zero();
835
836 x.set_vector_component(0, flat_data[base]);
838 x.set_vector_component(1, flat_data[base + 1]);
839 x.set_vector_component(2, flat_data[base + 2]);
840
841 y.set_vector_component(0, flat_data[base + 3]);
842 y.set_vector_component(1, flat_data[base + 4]);
843 y.set_vector_component(2, flat_data[base + 5]);
844
845 z.set_vector_component(0, flat_data[base + 6]);
846 z.set_vector_component(1, flat_data[base + 7]);
847 z.set_vector_component(2, flat_data[base + 8]);
848
849 x_batch.push(x);
850 y_batch.push(y);
851 z_batch.push(z);
852 }
853
854 self.amari_chentsov_tensor_batch(&x_batch, &y_batch, &z_batch)
855 .await
856 }
857
858 pub async fn device_info(&self) -> Result<GpuDeviceInfo, GpuError> {
860 Ok(GpuDeviceInfo::new(true, "WebGPU Device"))
861 }
862
863 pub async fn memory_usage(&self) -> Result<u64, GpuError> {
865 Ok(1024 * 1024) }
868
869 pub async fn fisher_information_matrix(
871 &self,
872 _parameters: &[f64],
873 ) -> Result<GpuFisherMatrix, GpuError> {
874 Ok(GpuFisherMatrix::new(vec![vec![1.0, 0.0], vec![0.0, 1.0]]))
876 }
877
878 pub async fn bregman_divergence_batch(
880 &self,
881 p_batch: &[Vec<f64>],
882 q_batch: &[Vec<f64>],
883 ) -> Result<Vec<f64>, GpuError> {
884 let results = p_batch
886 .iter()
887 .zip(q_batch.iter())
888 .map(|(p, q)| {
889 p.iter()
891 .zip(q.iter())
892 .map(|(pi, qi)| {
893 if *pi > 0.0 && *qi > 0.0 {
894 pi * (pi / qi).ln()
895 } else {
896 0.0
897 }
898 })
899 .sum()
900 })
901 .collect();
902 Ok(results)
903 }
904
905 #[allow(dead_code)] async fn compute_tensor_batch_gpu(
917 &self,
918 x_batch: &[Multivector<3, 0, 0>],
919 y_batch: &[Multivector<3, 0, 0>],
920 z_batch: &[Multivector<3, 0, 0>],
921 ) -> Result<Vec<f64>, GpuError> {
922 let batch_size = x_batch.len();
923
924 let x_data: Vec<f32> = x_batch
926 .iter()
927 .flat_map(|mv| {
928 vec![
929 mv.vector_component(0) as f32,
930 mv.vector_component(1) as f32,
931 mv.vector_component(2) as f32,
932 ]
933 })
934 .collect();
935
936 let y_data: Vec<f32> = y_batch
937 .iter()
938 .flat_map(|mv| {
939 vec![
940 mv.vector_component(0) as f32,
941 mv.vector_component(1) as f32,
942 mv.vector_component(2) as f32,
943 ]
944 })
945 .collect();
946
947 let z_data: Vec<f32> = z_batch
948 .iter()
949 .flat_map(|mv| {
950 vec![
951 mv.vector_component(0) as f32,
952 mv.vector_component(1) as f32,
953 mv.vector_component(2) as f32,
954 ]
955 })
956 .collect();
957
958 let x_buffer = self
960 .device
961 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
962 label: Some("X Batch Buffer"),
963 contents: bytemuck::cast_slice(&x_data),
964 usage: wgpu::BufferUsages::STORAGE,
965 });
966
967 let y_buffer = self
968 .device
969 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
970 label: Some("Y Batch Buffer"),
971 contents: bytemuck::cast_slice(&y_data),
972 usage: wgpu::BufferUsages::STORAGE,
973 });
974
975 let z_buffer = self
976 .device
977 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
978 label: Some("Z Batch Buffer"),
979 contents: bytemuck::cast_slice(&z_data),
980 usage: wgpu::BufferUsages::STORAGE,
981 });
982
983 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
984 label: Some("Output Buffer"),
985 size: (batch_size * 4) as u64, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
987 mapped_at_creation: false,
988 });
989
990 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
991 label: Some("Staging Buffer"),
992 size: (batch_size * 4) as u64,
993 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
994 mapped_at_creation: false,
995 });
996
997 let bind_group_layout = self.tensor_pipeline.get_bind_group_layout(0);
999 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1000 label: Some("Tensor Compute Bind Group"),
1001 layout: &bind_group_layout,
1002 entries: &[
1003 wgpu::BindGroupEntry {
1004 binding: 0,
1005 resource: x_buffer.as_entire_binding(),
1006 },
1007 wgpu::BindGroupEntry {
1008 binding: 1,
1009 resource: y_buffer.as_entire_binding(),
1010 },
1011 wgpu::BindGroupEntry {
1012 binding: 2,
1013 resource: z_buffer.as_entire_binding(),
1014 },
1015 wgpu::BindGroupEntry {
1016 binding: 3,
1017 resource: output_buffer.as_entire_binding(),
1018 },
1019 ],
1020 });
1021
1022 let mut encoder = self
1024 .device
1025 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1026 label: Some("Tensor Compute Encoder"),
1027 });
1028
1029 {
1030 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1031 label: Some("Tensor Compute Pass"),
1032 timestamp_writes: None,
1033 });
1034 compute_pass.set_pipeline(&self.tensor_pipeline);
1035 compute_pass.set_bind_group(0, &bind_group, &[]);
1036 let workgroup_count = batch_size.div_ceil(64); compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
1038 }
1039
1040 encoder.copy_buffer_to_buffer(
1041 &output_buffer,
1042 0,
1043 &staging_buffer,
1044 0,
1045 (batch_size * 4) as u64,
1046 );
1047
1048 self.queue.submit(std::iter::once(encoder.finish()));
1049
1050 let buffer_slice = staging_buffer.slice(..);
1052 let (sender, receiver) = futures::channel::oneshot::channel();
1053 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
1054 let _ = sender.send(result);
1055 });
1056
1057 self.device.poll(wgpu::Maintain::Wait);
1058
1059 receiver
1060 .await
1061 .map_err(|_| GpuError::BufferError("Failed to receive buffer map result".to_string()))?
1062 .map_err(|e| GpuError::BufferError(format!("Buffer mapping failed: {:?}", e)))?;
1063
1064 let data = buffer_slice.get_mapped_range();
1065 let result_f32: &[f32] = bytemuck::cast_slice(&data);
1066 let results: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
1067
1068 drop(data);
1069 staging_buffer.unmap();
1070
1071 Ok(results)
1072 }
1073
1074 fn create_tensor_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
1075 let shader_source = TENSOR_COMPUTE_SHADER;
1076 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
1077 label: Some("Tensor Compute Shader"),
1078 source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(shader_source)),
1079 });
1080
1081 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1082 label: Some("Tensor Compute Pipeline"),
1083 layout: None,
1084 module: &shader,
1085 entry_point: "main",
1086 });
1087
1088 Ok(compute_pipeline)
1089 }
1090
1091 fn create_fisher_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
1092 Self::create_tensor_pipeline(device)
1094 }
1095
1096 fn create_divergence_pipeline(
1097 device: &wgpu::Device,
1098 ) -> Result<wgpu::ComputePipeline, GpuError> {
1099 Self::create_tensor_pipeline(device)
1101 }
1102}
1103
1104pub struct GpuDeviceInfo {
1106 is_gpu: bool,
1107 #[allow(dead_code)]
1108 description: String,
1109}
1110
1111impl GpuDeviceInfo {
1112 fn new(is_gpu: bool, description: &str) -> Self {
1113 Self {
1114 is_gpu,
1115 description: description.to_string(),
1116 }
1117 }
1118
1119 pub fn is_gpu(&self) -> bool {
1120 self.is_gpu
1121 }
1122
1123 pub fn supports_webgpu(&self) -> bool {
1124 self.is_gpu
1125 }
1126
1127 pub fn is_initialized(&self) -> bool {
1128 true
1129 }
1130}
1131
1132pub struct GpuFisherMatrix {
1134 matrix: Vec<Vec<f64>>,
1135}
1136
1137impl GpuFisherMatrix {
1138 fn new(matrix: Vec<Vec<f64>>) -> Self {
1139 Self { matrix }
1140 }
1141
1142 pub async fn eigenvalues(&self) -> Result<Vec<f64>, GpuError> {
1143 let mut eigenvals = Vec::new();
1145 for i in 0..self.matrix.len() {
1146 if i < self.matrix[i].len() {
1147 eigenvals.push(self.matrix[i][i]);
1148 }
1149 }
1150 Ok(eigenvals)
1151 }
1152}
1153
1154const TENSOR_COMPUTE_SHADER: &str = r#"
1156@group(0) @binding(0)
1157var<storage, read> x_batch: array<vec3<f32>>;
1158
1159@group(0) @binding(1)
1160var<storage, read> y_batch: array<vec3<f32>>;
1161
1162@group(0) @binding(2)
1163var<storage, read> z_batch: array<vec3<f32>>;
1164
1165@group(0) @binding(3)
1166var<storage, read_write> output: array<f32>;
1167
1168@compute @workgroup_size(64)
1169fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1170 let idx = global_id.x;
1171 if (idx >= arrayLength(&x_batch)) {
1172 return;
1173 }
1174
1175 let x = x_batch[idx];
1176 let y = y_batch[idx];
1177 let z = z_batch[idx];
1178
1179 // Compute scalar triple product: x · (y × z)
1180 let cross_yz = cross(y, z);
1181 let scalar_triple = dot(x, cross_yz);
1182
1183 output[idx] = scalar_triple;
1184}
1185"#;
1186
1187#[cfg(test)]
1188mod tests {
1189 use super::*;
1190
1191 #[test]
1192 fn test_should_use_gpu() {
1193 assert!(!GpuCliffordAlgebra::should_use_gpu(10));
1194 assert!(GpuCliffordAlgebra::should_use_gpu(1000));
1195 }
1196
1197 #[test]
1198 fn test_should_use_gpu_threshold() {
1199 assert!(!GpuCliffordAlgebra::should_use_gpu(99));
1201 assert!(GpuCliffordAlgebra::should_use_gpu(100));
1202 assert!(GpuCliffordAlgebra::should_use_gpu(101));
1203 }
1204
1205 #[test]
1206 fn test_should_use_gpu_zero_batch() {
1207 assert!(!GpuCliffordAlgebra::should_use_gpu(0));
1208 }
1209
1210 #[test]
1211 fn test_should_use_gpu_single_element() {
1212 assert!(!GpuCliffordAlgebra::should_use_gpu(1));
1213 }
1214
1215 #[test]
1216 fn test_gpu_error_display() {
1217 let init_err = GpuError::InitializationError("No GPU found".to_string());
1218 assert!(init_err.to_string().contains("Failed to initialize GPU"));
1219
1220 let buffer_err = GpuError::BufferError("Buffer too small".to_string());
1221 assert!(buffer_err.to_string().contains("buffer error"));
1222
1223 let shader_err = GpuError::ShaderError("Invalid syntax".to_string());
1224 assert!(shader_err.to_string().contains("Shader compilation"));
1225 }
1226
1227 #[test]
1228 fn test_gpu_error_debug() {
1229 let err = GpuError::InitializationError("test".to_string());
1230 let debug_str = format!("{:?}", err);
1231 assert!(debug_str.contains("InitializationError"));
1232 }
1233
1234 #[test]
1235 fn test_gpu_error_variants() {
1236 let errors = vec![
1238 GpuError::InitializationError("init failed".to_string()),
1239 GpuError::BufferError("buffer failed".to_string()),
1240 GpuError::ShaderError("shader failed".to_string()),
1241 ];
1242
1243 for err in errors {
1244 let _ = err.to_string();
1246 let _ = format!("{:?}", err);
1248 }
1249 }
1250
1251 #[tokio::test]
1252 #[ignore = "GPU hardware required, may fail in CI/CD environments"]
1253 async fn test_adaptive_compute_geometric_product() {
1254 let adaptive = AdaptiveCompute::new::<3, 0, 0>().await;
1256
1257 let e1 = Multivector::<3, 0, 0>::basis_vector(0);
1258 let e2 = Multivector::<3, 0, 0>::basis_vector(1);
1259
1260 let result = adaptive.geometric_product(&e1, &e2).await;
1262 assert!(result.magnitude() > 0.0);
1264 }
1265
1266 #[tokio::test]
1267 #[ignore = "GPU hardware required, may fail in CI/CD environments"]
1268 async fn test_adaptive_compute_batch_small() {
1269 let adaptive = AdaptiveCompute::new::<3, 0, 0>().await;
1272
1273 let e1_coeffs = vec![0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]; let e2_coeffs = vec![0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]; let mut a_batch = Vec::new();
1279 let mut b_batch = Vec::new();
1280 a_batch.extend_from_slice(&e1_coeffs);
1281 a_batch.extend_from_slice(&e1_coeffs);
1282 b_batch.extend_from_slice(&e2_coeffs);
1283 b_batch.extend_from_slice(&e2_coeffs);
1284
1285 let result = adaptive.batch_geometric_product(&a_batch, &b_batch).await;
1286 assert!(result.is_ok());
1287 assert_eq!(result.unwrap().len(), 16);
1289 }
1290
1291 #[test]
1292 fn test_gpu_device_info_methods() {
1293 let info = GpuDeviceInfo::new(true, "Test GPU");
1294
1295 assert!(info.is_gpu());
1296 assert!(info.supports_webgpu());
1298 assert!(info.is_initialized());
1299 }
1300
1301 #[test]
1302 fn test_gpu_device_info_cpu() {
1303 let info = GpuDeviceInfo::new(false, "CPU Fallback");
1304
1305 assert!(!info.is_gpu());
1306 assert!(!info.supports_webgpu());
1307 assert!(info.is_initialized());
1308 }
1309
1310 #[tokio::test]
1311 async fn test_gpu_info_geometry_creation() {
1312 if std::env::var("CI").is_ok()
1314 || std::env::var("GITHUB_ACTIONS").is_ok()
1315 || std::env::var("DISPLAY").is_err()
1316 {
1317 println!("Skipping GPU test in CI environment");
1318 return;
1319 }
1320
1321 match GpuInfoGeometry::new().await {
1323 Ok(_) => {
1324 println!("GPU initialization successful");
1326 }
1327 Err(GpuError::InitializationError(_)) => {
1328 println!("GPU initialization failed - no GPU available");
1330 }
1331 Err(e) => panic!("Unexpected error: {:?}", e),
1332 }
1333 }
1334
1335 #[tokio::test]
1336 async fn test_gpu_fisher_matrix_eigenvalues() {
1337 let matrix = GpuFisherMatrix::new(vec![
1338 vec![1.0, 0.0, 0.0],
1339 vec![0.0, 2.0, 0.0],
1340 vec![0.0, 0.0, 3.0],
1341 ]);
1342 let eigenvalues = matrix.eigenvalues().await.unwrap();
1343 assert_eq!(eigenvalues.len(), 3);
1344 assert_eq!(eigenvalues[0], 1.0);
1345 assert_eq!(eigenvalues[1], 2.0);
1346 assert_eq!(eigenvalues[2], 3.0);
1347 }
1348
1349 #[tokio::test]
1350 async fn test_gpu_fisher_matrix_empty() {
1351 let matrix = GpuFisherMatrix::new(vec![]);
1352 let eigenvalues = matrix.eigenvalues().await.unwrap();
1353 assert!(eigenvalues.is_empty());
1354 }
1355
1356 #[tokio::test]
1357 async fn test_gpu_fisher_matrix_single_element() {
1358 let matrix = GpuFisherMatrix::new(vec![vec![5.0]]);
1359 let eigenvalues = matrix.eigenvalues().await.unwrap();
1360 assert_eq!(eigenvalues.len(), 1);
1361 assert_eq!(eigenvalues[0], 5.0);
1362 }
1363
1364 #[test]
1365 fn test_cayley_entry_struct() {
1366 let entry = CayleyEntry {
1368 sign: 1.0,
1369 index: 5,
1370 };
1371 assert_eq!(entry.sign, 1.0);
1372 assert_eq!(entry.index, 5);
1373
1374 let entries = vec![
1376 entry,
1377 CayleyEntry {
1378 sign: -1.0,
1379 index: 3,
1380 },
1381 ];
1382 let bytes: &[u8] = bytemuck::cast_slice(&entries);
1383 assert_eq!(bytes.len(), 16); }
1385
1386 #[test]
1387 fn test_cayley_entry_zero() {
1388 let zeroed: CayleyEntry = bytemuck::Zeroable::zeroed();
1390 assert_eq!(zeroed.sign, 0.0);
1391 assert_eq!(zeroed.index, 0);
1392 }
1393
1394 #[test]
1395 fn test_geometric_product_shader_not_empty() {
1396 assert!(!GEOMETRIC_PRODUCT_SHADER.is_empty());
1398 assert!(GEOMETRIC_PRODUCT_SHADER.contains("@compute"));
1399 assert!(GEOMETRIC_PRODUCT_SHADER.contains("@workgroup_size"));
1400 assert!(GEOMETRIC_PRODUCT_SHADER.contains("CayleyEntry"));
1401 }
1402
1403 #[test]
1404 fn test_tensor_compute_shader_not_empty() {
1405 assert!(!TENSOR_COMPUTE_SHADER.is_empty());
1407 assert!(TENSOR_COMPUTE_SHADER.contains("@compute"));
1408 assert!(TENSOR_COMPUTE_SHADER.contains("@workgroup_size"));
1409 assert!(TENSOR_COMPUTE_SHADER.contains("cross"));
1410 assert!(TENSOR_COMPUTE_SHADER.contains("dot"));
1411 }
1412
1413 #[test]
1414 fn test_gpu_error_from_string() {
1415 let msg = "Custom error message";
1417 let err = GpuError::InitializationError(msg.to_string());
1418 assert!(err.to_string().contains(msg));
1419
1420 let err = GpuError::BufferError(msg.to_string());
1421 assert!(err.to_string().contains(msg));
1422
1423 let err = GpuError::ShaderError(msg.to_string());
1424 assert!(err.to_string().contains(msg));
1425 }
1426
1427 #[test]
1428 fn test_should_use_gpu_large_batch() {
1429 assert!(GpuCliffordAlgebra::should_use_gpu(1000));
1431 assert!(GpuCliffordAlgebra::should_use_gpu(10000));
1432 assert!(GpuCliffordAlgebra::should_use_gpu(100000));
1433 }
1434
1435 #[test]
1436 fn test_should_use_gpu_near_threshold() {
1437 for i in 0..100 {
1439 assert!(!GpuCliffordAlgebra::should_use_gpu(i));
1440 }
1441 for i in 100..200 {
1442 assert!(GpuCliffordAlgebra::should_use_gpu(i));
1443 }
1444 }
1445
1446 #[test]
1447 fn test_gpu_device_info_initialized_always_true() {
1448 let gpu_info = GpuDeviceInfo::new(true, "GPU");
1450 let cpu_info = GpuDeviceInfo::new(false, "CPU");
1451 assert!(gpu_info.is_initialized());
1452 assert!(cpu_info.is_initialized());
1453 }
1454
1455 #[tokio::test]
1456 async fn test_adaptive_compute_cpu_fallback_small_batch() {
1457 let adaptive = AdaptiveCompute { gpu: None };
1459
1460 let e1_coeffs = vec![0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0];
1461 let e2_coeffs = vec![0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0];
1462
1463 let result = adaptive
1464 .batch_geometric_product(&e1_coeffs, &e2_coeffs)
1465 .await;
1466 assert!(result.is_ok());
1467 let coeffs = result.unwrap();
1468 assert_eq!(coeffs.len(), 8);
1469 let has_nonzero = coeffs.iter().any(|&c| c.abs() > 0.5);
1471 assert!(has_nonzero, "Product should have non-zero coefficients");
1472 }
1473
1474 #[tokio::test]
1475 async fn test_adaptive_compute_cpu_fallback_multiple_elements() {
1476 let adaptive = AdaptiveCompute { gpu: None };
1478
1479 let mut a_batch = Vec::new();
1480 let mut b_batch = Vec::new();
1481
1482 for _ in 0..5 {
1484 a_batch.extend_from_slice(&[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
1485 b_batch.extend_from_slice(&[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
1486 }
1487
1488 let result = adaptive.batch_geometric_product(&a_batch, &b_batch).await;
1489 assert!(result.is_ok());
1490 assert_eq!(result.unwrap().len(), 40); }
1492
1493 #[tokio::test]
1494 async fn test_adaptive_compute_geometric_product_cpu() {
1495 let adaptive = AdaptiveCompute { gpu: None };
1497
1498 let a = Multivector::<3, 0, 0>::scalar(2.0);
1499 let b = Multivector::<3, 0, 0>::scalar(3.0);
1500
1501 let result = adaptive.geometric_product(&a, &b).await;
1502 assert!((result.scalar_part() - 6.0).abs() < 1e-10);
1503 }
1504
1505 #[tokio::test]
1506 async fn test_adaptive_compute_geometric_product_basis_vectors() {
1507 let adaptive = AdaptiveCompute { gpu: None };
1509
1510 let e1 = Multivector::<3, 0, 0>::basis_vector(0);
1511 let e1_clone = Multivector::<3, 0, 0>::basis_vector(0);
1512
1513 let result = adaptive.geometric_product(&e1, &e1_clone).await;
1515 assert!((result.scalar_part() - 1.0).abs() < 1e-10);
1516 }
1517}