1pub mod adaptive;
108#[cfg(feature = "automata")]
109pub mod automata;
110pub mod benchmarks;
111#[cfg(feature = "calculus")]
112pub mod calculus;
113#[cfg(feature = "dual")]
114pub mod dual;
115#[cfg(feature = "enumerative")]
116pub mod enumerative;
117#[cfg(feature = "functional")]
118pub mod functional;
119#[cfg(feature = "holographic")]
124pub mod holographic;
125#[cfg(feature = "measure")]
126pub mod measure;
127pub mod multi_gpu;
128pub mod network;
129pub mod performance;
130#[cfg(feature = "probabilistic")]
131pub mod probabilistic;
132pub mod relativistic;
133pub mod shaders;
134pub mod timeline;
135#[cfg(feature = "topology")]
140pub mod topology;
141pub mod unified;
142pub mod verification;
143
144pub use adaptive::{
145 AdaptiveVerificationError, AdaptiveVerificationLevel, AdaptiveVerifier, CpuFeatures,
146 GpuBackend, PlatformCapabilities, PlatformPerformanceProfile, VerificationPlatform,
147 WasmEnvironment,
148};
149use amari_core::Multivector;
150use amari_info_geom::amari_chentsov_tensor;
151pub use benchmarks::{
152 AmariMultiGpuBenchmarks, BenchmarkConfig, BenchmarkResult, BenchmarkRunner,
153 BenchmarkSuiteResults, BenchmarkSummary, ScalingAnalysis,
154};
155use bytemuck::{Pod, Zeroable};
156#[cfg(feature = "calculus")]
157pub use calculus::GpuCalculus;
158#[cfg(feature = "functional")]
159pub use functional::{
160 AdaptiveFunctionalCompute, GpuFunctionalError, GpuFunctionalResult, GpuHilbertSpace,
161 GpuMatrixOperator, GpuSpectralDecomposition,
162};
163#[cfg(feature = "holographic")]
164pub use holographic::{
165 GpuHolographic, GpuHolographicError, GpuHolographicMemory, GpuHolographicResult,
166 GpuOpticalField,
167};
168#[cfg(feature = "measure")]
169pub use measure::{
170 GpuIntegrator, GpuMonteCarloIntegrator, GpuMultidimIntegrator, GpuParametricDensity,
171 GpuTropicalMeasure,
172};
173pub use multi_gpu::{
174 ComputeIntensity, DeviceCapabilities, DeviceId, DeviceWorkload, GpuArchitecture, GpuDevice,
175 IntelligentLoadBalancer, LoadBalancingStrategy, MultiGpuBarrier, PerformanceRecord,
176 PerformanceStats, SynchronizationManager, Workload, WorkloadCoordinator,
177};
178pub use network::{AdaptiveNetworkCompute, GpuGeometricNetwork, GpuNetworkError, GpuNetworkResult};
179pub use performance::{
180 AdaptiveDispatchPolicy, CalibrationResult, GpuProfile, GpuProfiler, WorkgroupConfig,
181 WorkgroupOptimizer,
182};
183#[cfg(feature = "probabilistic")]
184pub use probabilistic::{GpuProbabilistic, GpuProbabilisticError, GpuProbabilisticResult};
185pub use relativistic::{
186 GpuRelativisticParticle, GpuRelativisticPhysics, GpuSpacetimeVector, GpuTrajectoryParams,
187};
188pub use shaders::{
189 ShaderLibrary, DUAL_SHADERS, FUSION_SHADERS, TOPOLOGY_SHADERS, TROPICAL_SHADERS,
190};
191use thiserror::Error;
192pub use timeline::{
193 BottleneckAnalysis, DeviceUtilizationStats, GpuTimelineAnalyzer, MultiGpuPerformanceMonitor,
194 OptimizationRecommendation, PerformanceAnalysisReport, PerformanceBottleneck,
195 PerformanceSummary, RecommendationPriority, SynchronizationAnalysis, TimelineEvent,
196 UtilizationAnalysis,
197};
198#[cfg(feature = "topology")]
199pub use topology::{
200 AdaptiveTopologyCompute, GpuCriticalPoint, GpuTopology, GpuTopologyError, GpuTopologyResult,
201};
202pub use unified::{
203 BufferPoolStats, EnhancedGpuBufferPool, GpuAccelerated, GpuContext, GpuDispatcher,
204 GpuOperationParams, GpuParam, MultiGpuStats, PoolEntryStats, SharedGpuContext, UnifiedGpuError,
205 UnifiedGpuResult,
206};
207pub use verification::{
208 GpuBoundaryVerifier, GpuVerificationError, RelativisticVerifier, StatisticalGpuVerifier,
209 VerificationConfig, VerificationStrategy, VerifiedMultivector,
210};
211use wgpu::util::DeviceExt;
212
213#[derive(Error, Debug)]
214pub enum GpuError {
215 #[error("Failed to initialize GPU: {0}")]
216 InitializationError(String),
217
218 #[error("GPU buffer error: {0}")]
219 BufferError(String),
220
221 #[error("Shader compilation error: {0}")]
222 ShaderError(String),
223}
224
225pub struct GpuCliffordAlgebra {
227 device: wgpu::Device,
228 queue: wgpu::Queue,
229 compute_pipeline: wgpu::ComputePipeline,
230 cayley_buffer: wgpu::Buffer,
231 #[allow(dead_code)]
232 dim: usize,
233 basis_count: usize,
234}
235
236impl GpuCliffordAlgebra {
237 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Result<Self, GpuError> {
239 let instance = wgpu::Instance::default();
240
241 let adapter = instance
242 .request_adapter(&wgpu::RequestAdapterOptions {
243 power_preference: wgpu::PowerPreference::HighPerformance,
244 compatible_surface: None,
245 force_fallback_adapter: false,
246 })
247 .await
248 .ok_or_else(|| GpuError::InitializationError("No GPU adapter found".to_string()))?;
249
250 let (device, queue) = adapter
251 .request_device(
252 &wgpu::DeviceDescriptor {
253 label: Some("Amari GPU Device"),
254 required_features: wgpu::Features::empty(),
255 required_limits: wgpu::Limits::default(),
256 },
257 None,
258 )
259 .await
260 .map_err(|e| GpuError::InitializationError(e.to_string()))?;
261
262 let dim = P + Q + R;
263 let basis_count = 1 << dim;
264
265 let cayley_table = Self::generate_cayley_table::<P, Q, R>();
267 let cayley_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
268 label: Some("Cayley Table"),
269 contents: bytemuck::cast_slice(&cayley_table),
270 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
271 });
272
273 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
275 label: Some("Geometric Product Shader"),
276 source: wgpu::ShaderSource::Wgsl(GEOMETRIC_PRODUCT_SHADER.into()),
277 });
278
279 let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
280 label: Some("Compute Bind Group Layout"),
281 entries: &[
282 wgpu::BindGroupLayoutEntry {
283 binding: 0,
284 visibility: wgpu::ShaderStages::COMPUTE,
285 ty: wgpu::BindingType::Buffer {
286 ty: wgpu::BufferBindingType::Storage { read_only: true },
287 has_dynamic_offset: false,
288 min_binding_size: None,
289 },
290 count: None,
291 },
292 wgpu::BindGroupLayoutEntry {
293 binding: 1,
294 visibility: wgpu::ShaderStages::COMPUTE,
295 ty: wgpu::BindingType::Buffer {
296 ty: wgpu::BufferBindingType::Storage { read_only: true },
297 has_dynamic_offset: false,
298 min_binding_size: None,
299 },
300 count: None,
301 },
302 wgpu::BindGroupLayoutEntry {
303 binding: 2,
304 visibility: wgpu::ShaderStages::COMPUTE,
305 ty: wgpu::BindingType::Buffer {
306 ty: wgpu::BufferBindingType::Storage { read_only: true },
307 has_dynamic_offset: false,
308 min_binding_size: None,
309 },
310 count: None,
311 },
312 wgpu::BindGroupLayoutEntry {
313 binding: 3,
314 visibility: wgpu::ShaderStages::COMPUTE,
315 ty: wgpu::BindingType::Buffer {
316 ty: wgpu::BufferBindingType::Storage { read_only: false },
317 has_dynamic_offset: false,
318 min_binding_size: None,
319 },
320 count: None,
321 },
322 ],
323 });
324
325 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
326 label: Some("Compute Pipeline Layout"),
327 bind_group_layouts: &[&bind_group_layout],
328 push_constant_ranges: &[],
329 });
330
331 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
332 label: Some("Geometric Product Pipeline"),
333 layout: Some(&pipeline_layout),
334 module: &shader,
335 entry_point: "main",
336 });
337
338 Ok(Self {
339 device,
340 queue,
341 compute_pipeline,
342 cayley_buffer,
343 dim,
344 basis_count,
345 })
346 }
347
348 fn generate_cayley_table<const P: usize, const Q: usize, const R: usize>() -> Vec<CayleyEntry> {
350 use amari_core::cayley::CayleyTable;
351
352 let table = CayleyTable::<P, Q, R>::get();
353 let basis_count = 1 << (P + Q + R);
354 let mut flat_table = Vec::with_capacity(basis_count * basis_count);
355
356 for i in 0..basis_count {
357 for j in 0..basis_count {
358 let (sign, index) = table.get_product(i, j);
359 flat_table.push(CayleyEntry {
360 sign: sign as f32,
361 index: index as u32,
362 });
363 }
364 }
365
366 flat_table
367 }
368
369 pub async fn batch_geometric_product(
371 &self,
372 a_batch: &[f64],
373 b_batch: &[f64],
374 ) -> Result<Vec<f64>, GpuError> {
375 let batch_size = a_batch.len() / self.basis_count;
376
377 if a_batch.len() != b_batch.len() {
378 return Err(GpuError::BufferError(
379 "Input batches must have same size".to_string(),
380 ));
381 }
382
383 let a_f32: Vec<f32> = a_batch.iter().map(|&x| x as f32).collect();
385 let b_f32: Vec<f32> = b_batch.iter().map(|&x| x as f32).collect();
386
387 let a_buffer = self
389 .device
390 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
391 label: Some("A Buffer"),
392 contents: bytemuck::cast_slice(&a_f32),
393 usage: wgpu::BufferUsages::STORAGE,
394 });
395
396 let b_buffer = self
397 .device
398 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
399 label: Some("B Buffer"),
400 contents: bytemuck::cast_slice(&b_f32),
401 usage: wgpu::BufferUsages::STORAGE,
402 });
403
404 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
405 label: Some("Output Buffer"),
406 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
407 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
408 mapped_at_creation: false,
409 });
410
411 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
412 label: Some("Staging Buffer"),
413 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
414 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
415 mapped_at_creation: false,
416 });
417
418 let bind_group_layout = self.compute_pipeline.get_bind_group_layout(0);
420 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
421 label: Some("Compute Bind Group"),
422 layout: &bind_group_layout,
423 entries: &[
424 wgpu::BindGroupEntry {
425 binding: 0,
426 resource: self.cayley_buffer.as_entire_binding(),
427 },
428 wgpu::BindGroupEntry {
429 binding: 1,
430 resource: a_buffer.as_entire_binding(),
431 },
432 wgpu::BindGroupEntry {
433 binding: 2,
434 resource: b_buffer.as_entire_binding(),
435 },
436 wgpu::BindGroupEntry {
437 binding: 3,
438 resource: output_buffer.as_entire_binding(),
439 },
440 ],
441 });
442
443 let mut encoder = self
445 .device
446 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
447 label: Some("Compute Encoder"),
448 });
449
450 {
451 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
452 label: Some("Compute Pass"),
453 timestamp_writes: None,
454 });
455
456 compute_pass.set_pipeline(&self.compute_pipeline);
457 compute_pass.set_bind_group(0, &bind_group, &[]);
458 compute_pass.dispatch_workgroups(batch_size as u32, 1, 1);
459 }
460
461 encoder.copy_buffer_to_buffer(
462 &output_buffer,
463 0,
464 &staging_buffer,
465 0,
466 (a_batch.len() * std::mem::size_of::<f32>()) as u64,
467 );
468
469 self.queue.submit(Some(encoder.finish()));
470
471 let buffer_slice = staging_buffer.slice(..);
473 let (sender, receiver) = futures::channel::oneshot::channel();
474 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
475 sender.send(result).unwrap();
476 });
477
478 self.device.poll(wgpu::Maintain::Wait);
479 receiver
480 .await
481 .unwrap()
482 .map_err(|e| GpuError::BufferError(e.to_string()))?;
483
484 let data = buffer_slice.get_mapped_range();
485 let result_f32: &[f32] = bytemuck::cast_slice(&data);
486 let result: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
487
488 drop(data);
489 staging_buffer.unmap();
490
491 Ok(result)
492 }
493
494 pub fn should_use_gpu(operation_count: usize) -> bool {
496 operation_count >= 100
498 }
499}
500
501#[repr(C)]
503#[derive(Copy, Clone, Pod, Zeroable)]
504struct CayleyEntry {
505 sign: f32,
506 index: u32,
507}
508
509const GEOMETRIC_PRODUCT_SHADER: &str = r#"
511struct CayleyEntry {
512 sign: f32,
513 index: u32,
514}
515
516@group(0) @binding(0)
517var<storage, read> cayley_table: array<CayleyEntry>;
518
519@group(0) @binding(1)
520var<storage, read> a_batch: array<f32>;
521
522@group(0) @binding(2)
523var<storage, read> b_batch: array<f32>;
524
525@group(0) @binding(3)
526var<storage, read_write> output: array<f32>;
527
528const BASIS_COUNT: u32 = 8u; // For 3D Clifford algebra
529
530@compute @workgroup_size(1)
531fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
532 let batch_idx = global_id.x;
533 let offset = batch_idx * BASIS_COUNT;
534
535 // Clear output
536 for (var k = 0u; k < BASIS_COUNT; k = k + 1u) {
537 output[offset + k] = 0.0;
538 }
539
540 // Compute geometric product
541 for (var i = 0u; i < BASIS_COUNT; i = i + 1u) {
542 let a_coeff = a_batch[offset + i];
543 if (abs(a_coeff) < 1e-14) {
544 continue;
545 }
546
547 for (var j = 0u; j < BASIS_COUNT; j = j + 1u) {
548 let b_coeff = b_batch[offset + j];
549 if (abs(b_coeff) < 1e-14) {
550 continue;
551 }
552
553 let table_idx = i * BASIS_COUNT + j;
554 let entry = cayley_table[table_idx];
555 output[offset + entry.index] += entry.sign * a_coeff * b_coeff;
556 }
557 }
558}
559"#;
560
561pub struct AdaptiveCompute {
563 gpu: Option<GpuCliffordAlgebra>,
564}
565
566impl AdaptiveCompute {
567 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Self {
569 let gpu = GpuCliffordAlgebra::new::<P, Q, R>().await.ok();
570 Self { gpu }
571 }
572
573 pub async fn geometric_product<const P: usize, const Q: usize, const R: usize>(
575 &self,
576 a: &Multivector<P, Q, R>,
577 b: &Multivector<P, Q, R>,
578 ) -> Multivector<P, Q, R> {
579 a.geometric_product(b)
581 }
582
583 pub async fn batch_geometric_product(
585 &self,
586 a_batch: &[f64],
587 b_batch: &[f64],
588 ) -> Result<Vec<f64>, GpuError> {
589 let batch_size = a_batch.len() / 8; if let Some(gpu) = &self.gpu {
592 if GpuCliffordAlgebra::should_use_gpu(batch_size) {
593 return gpu.batch_geometric_product(a_batch, b_batch).await;
594 }
595 }
596
597 let mut result = Vec::with_capacity(a_batch.len());
599 for i in 0..batch_size {
600 let start = i * 8;
601 let end = start + 8;
602
603 let a = Multivector::<3, 0, 0>::from_coefficients(a_batch[start..end].to_vec());
604 let b = Multivector::<3, 0, 0>::from_coefficients(b_batch[start..end].to_vec());
605 let product = a.geometric_product(&b);
606
607 for j in 0..8 {
608 result.push(product.get(j));
609 }
610 }
611
612 Ok(result)
613 }
614}
615
616pub struct GpuInfoGeometry {
627 device: wgpu::Device,
628 queue: wgpu::Queue,
629 tensor_pipeline: wgpu::ComputePipeline,
630 #[allow(dead_code)]
631 fisher_pipeline: wgpu::ComputePipeline,
632 #[allow(dead_code)]
633 divergence_pipeline: wgpu::ComputePipeline,
634}
635
636impl GpuInfoGeometry {
637 pub async fn new() -> Result<Self, GpuError> {
639 let instance = wgpu::Instance::default();
640
641 let adapter = if let Some(adapter) = instance
643 .request_adapter(&wgpu::RequestAdapterOptions {
644 power_preference: wgpu::PowerPreference::HighPerformance,
645 compatible_surface: None,
646 force_fallback_adapter: false,
647 })
648 .await
649 {
650 adapter
651 } else if let Some(adapter) = instance
652 .request_adapter(&wgpu::RequestAdapterOptions {
653 power_preference: wgpu::PowerPreference::LowPower,
654 compatible_surface: None,
655 force_fallback_adapter: false,
656 })
657 .await
658 {
659 adapter
660 } else if let Some(adapter) = instance
661 .request_adapter(&wgpu::RequestAdapterOptions {
662 power_preference: wgpu::PowerPreference::None,
663 compatible_surface: None,
664 force_fallback_adapter: true,
665 })
666 .await
667 {
668 adapter
669 } else {
670 return Err(GpuError::InitializationError(
671 "No GPU adapter found".to_string(),
672 ));
673 };
674
675 let (device, queue) = adapter
676 .request_device(
677 &wgpu::DeviceDescriptor {
678 label: Some("Amari GPU Info Geometry Device"),
679 required_features: wgpu::Features::empty(),
680 required_limits: wgpu::Limits::default(),
681 },
682 None,
683 )
684 .await
685 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
686
687 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
689 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
690 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
691
692 Ok(Self {
693 device,
694 queue,
695 tensor_pipeline,
696 fisher_pipeline,
697 divergence_pipeline,
698 })
699 }
700
701 pub async fn new_with_device_preference(device_type: &str) -> Result<Self, GpuError> {
703 let (power_preference, force_fallback) = match device_type {
704 "high-performance" => (wgpu::PowerPreference::HighPerformance, false),
705 "low-power" => (wgpu::PowerPreference::LowPower, false),
706 "fallback" => (wgpu::PowerPreference::None, true),
707 _ => {
708 return Err(GpuError::InitializationError(
709 "Invalid device type".to_string(),
710 ))
711 }
712 };
713
714 let instance = wgpu::Instance::default();
715
716 let adapter = instance
717 .request_adapter(&wgpu::RequestAdapterOptions {
718 power_preference,
719 compatible_surface: None,
720 force_fallback_adapter: force_fallback,
721 })
722 .await
723 .ok_or_else(|| {
724 GpuError::InitializationError("No suitable adapter found".to_string())
725 })?;
726
727 let (device, queue) = adapter
728 .request_device(
729 &wgpu::DeviceDescriptor {
730 label: Some("Amari GPU Info Geometry Device"),
731 required_features: wgpu::Features::empty(),
732 required_limits: wgpu::Limits::default(),
733 },
734 None,
735 )
736 .await
737 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
738
739 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
740 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
741 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
742
743 Ok(Self {
744 device,
745 queue,
746 tensor_pipeline,
747 fisher_pipeline,
748 divergence_pipeline,
749 })
750 }
751
752 pub async fn amari_chentsov_tensor(
754 &self,
755 x: &Multivector<3, 0, 0>,
756 y: &Multivector<3, 0, 0>,
757 z: &Multivector<3, 0, 0>,
758 ) -> Result<f64, GpuError> {
759 Ok(amari_chentsov_tensor(x, y, z))
761 }
762
763 pub async fn amari_chentsov_tensor_batch(
774 &self,
775 x_batch: &[Multivector<3, 0, 0>],
776 y_batch: &[Multivector<3, 0, 0>],
777 z_batch: &[Multivector<3, 0, 0>],
778 ) -> Result<Vec<f64>, GpuError> {
779 let batch_size = x_batch.len();
780 if batch_size == 0 {
781 return Ok(Vec::new());
782 }
783
784 if batch_size < 100 {
786 let results = x_batch
787 .iter()
788 .zip(y_batch.iter())
789 .zip(z_batch.iter())
790 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
791 .collect();
792 return Ok(results);
793 }
794
795 let results = x_batch
799 .iter()
800 .zip(y_batch.iter())
801 .zip(z_batch.iter())
802 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
803 .collect();
804 Ok(results)
805 }
806
807 pub async fn amari_chentsov_tensor_from_typed_arrays(
809 &self,
810 flat_data: &[f64],
811 batch_size: usize,
812 ) -> Result<Vec<f64>, GpuError> {
813 if flat_data.len() != batch_size * 9 {
814 return Err(GpuError::BufferError("Invalid flat data size".to_string()));
815 }
816
817 let mut x_batch = Vec::with_capacity(batch_size);
819 let mut y_batch = Vec::with_capacity(batch_size);
820 let mut z_batch = Vec::with_capacity(batch_size);
821
822 for i in 0..batch_size {
823 let base = i * 9;
824 let mut x = Multivector::zero();
825 let mut y = Multivector::zero();
826 let mut z = Multivector::zero();
827
828 x.set_vector_component(0, flat_data[base]);
830 x.set_vector_component(1, flat_data[base + 1]);
831 x.set_vector_component(2, flat_data[base + 2]);
832
833 y.set_vector_component(0, flat_data[base + 3]);
834 y.set_vector_component(1, flat_data[base + 4]);
835 y.set_vector_component(2, flat_data[base + 5]);
836
837 z.set_vector_component(0, flat_data[base + 6]);
838 z.set_vector_component(1, flat_data[base + 7]);
839 z.set_vector_component(2, flat_data[base + 8]);
840
841 x_batch.push(x);
842 y_batch.push(y);
843 z_batch.push(z);
844 }
845
846 self.amari_chentsov_tensor_batch(&x_batch, &y_batch, &z_batch)
847 .await
848 }
849
850 pub async fn device_info(&self) -> Result<GpuDeviceInfo, GpuError> {
852 Ok(GpuDeviceInfo::new(true, "WebGPU Device"))
853 }
854
855 pub async fn memory_usage(&self) -> Result<u64, GpuError> {
857 Ok(1024 * 1024) }
860
861 pub async fn fisher_information_matrix(
863 &self,
864 _parameters: &[f64],
865 ) -> Result<GpuFisherMatrix, GpuError> {
866 Ok(GpuFisherMatrix::new(vec![vec![1.0, 0.0], vec![0.0, 1.0]]))
868 }
869
870 pub async fn bregman_divergence_batch(
872 &self,
873 p_batch: &[Vec<f64>],
874 q_batch: &[Vec<f64>],
875 ) -> Result<Vec<f64>, GpuError> {
876 let results = p_batch
878 .iter()
879 .zip(q_batch.iter())
880 .map(|(p, q)| {
881 p.iter()
883 .zip(q.iter())
884 .map(|(pi, qi)| {
885 if *pi > 0.0 && *qi > 0.0 {
886 pi * (pi / qi).ln()
887 } else {
888 0.0
889 }
890 })
891 .sum()
892 })
893 .collect();
894 Ok(results)
895 }
896
897 #[allow(dead_code)] async fn compute_tensor_batch_gpu(
909 &self,
910 x_batch: &[Multivector<3, 0, 0>],
911 y_batch: &[Multivector<3, 0, 0>],
912 z_batch: &[Multivector<3, 0, 0>],
913 ) -> Result<Vec<f64>, GpuError> {
914 let batch_size = x_batch.len();
915
916 let x_data: Vec<f32> = x_batch
918 .iter()
919 .flat_map(|mv| {
920 vec![
921 mv.vector_component(0) as f32,
922 mv.vector_component(1) as f32,
923 mv.vector_component(2) as f32,
924 ]
925 })
926 .collect();
927
928 let y_data: Vec<f32> = y_batch
929 .iter()
930 .flat_map(|mv| {
931 vec![
932 mv.vector_component(0) as f32,
933 mv.vector_component(1) as f32,
934 mv.vector_component(2) as f32,
935 ]
936 })
937 .collect();
938
939 let z_data: Vec<f32> = z_batch
940 .iter()
941 .flat_map(|mv| {
942 vec![
943 mv.vector_component(0) as f32,
944 mv.vector_component(1) as f32,
945 mv.vector_component(2) as f32,
946 ]
947 })
948 .collect();
949
950 let x_buffer = self
952 .device
953 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
954 label: Some("X Batch Buffer"),
955 contents: bytemuck::cast_slice(&x_data),
956 usage: wgpu::BufferUsages::STORAGE,
957 });
958
959 let y_buffer = self
960 .device
961 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
962 label: Some("Y Batch Buffer"),
963 contents: bytemuck::cast_slice(&y_data),
964 usage: wgpu::BufferUsages::STORAGE,
965 });
966
967 let z_buffer = self
968 .device
969 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
970 label: Some("Z Batch Buffer"),
971 contents: bytemuck::cast_slice(&z_data),
972 usage: wgpu::BufferUsages::STORAGE,
973 });
974
975 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
976 label: Some("Output Buffer"),
977 size: (batch_size * 4) as u64, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
979 mapped_at_creation: false,
980 });
981
982 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
983 label: Some("Staging Buffer"),
984 size: (batch_size * 4) as u64,
985 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
986 mapped_at_creation: false,
987 });
988
989 let bind_group_layout = self.tensor_pipeline.get_bind_group_layout(0);
991 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
992 label: Some("Tensor Compute Bind Group"),
993 layout: &bind_group_layout,
994 entries: &[
995 wgpu::BindGroupEntry {
996 binding: 0,
997 resource: x_buffer.as_entire_binding(),
998 },
999 wgpu::BindGroupEntry {
1000 binding: 1,
1001 resource: y_buffer.as_entire_binding(),
1002 },
1003 wgpu::BindGroupEntry {
1004 binding: 2,
1005 resource: z_buffer.as_entire_binding(),
1006 },
1007 wgpu::BindGroupEntry {
1008 binding: 3,
1009 resource: output_buffer.as_entire_binding(),
1010 },
1011 ],
1012 });
1013
1014 let mut encoder = self
1016 .device
1017 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1018 label: Some("Tensor Compute Encoder"),
1019 });
1020
1021 {
1022 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1023 label: Some("Tensor Compute Pass"),
1024 timestamp_writes: None,
1025 });
1026 compute_pass.set_pipeline(&self.tensor_pipeline);
1027 compute_pass.set_bind_group(0, &bind_group, &[]);
1028 let workgroup_count = batch_size.div_ceil(64); compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
1030 }
1031
1032 encoder.copy_buffer_to_buffer(
1033 &output_buffer,
1034 0,
1035 &staging_buffer,
1036 0,
1037 (batch_size * 4) as u64,
1038 );
1039
1040 self.queue.submit(std::iter::once(encoder.finish()));
1041
1042 let buffer_slice = staging_buffer.slice(..);
1044 let (sender, receiver) = futures::channel::oneshot::channel();
1045 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
1046 let _ = sender.send(result);
1047 });
1048
1049 self.device.poll(wgpu::Maintain::Wait);
1050
1051 receiver
1052 .await
1053 .map_err(|_| GpuError::BufferError("Failed to receive buffer map result".to_string()))?
1054 .map_err(|e| GpuError::BufferError(format!("Buffer mapping failed: {:?}", e)))?;
1055
1056 let data = buffer_slice.get_mapped_range();
1057 let result_f32: &[f32] = bytemuck::cast_slice(&data);
1058 let results: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
1059
1060 drop(data);
1061 staging_buffer.unmap();
1062
1063 Ok(results)
1064 }
1065
1066 fn create_tensor_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
1067 let shader_source = TENSOR_COMPUTE_SHADER;
1068 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
1069 label: Some("Tensor Compute Shader"),
1070 source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(shader_source)),
1071 });
1072
1073 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1074 label: Some("Tensor Compute Pipeline"),
1075 layout: None,
1076 module: &shader,
1077 entry_point: "main",
1078 });
1079
1080 Ok(compute_pipeline)
1081 }
1082
1083 fn create_fisher_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
1084 Self::create_tensor_pipeline(device)
1086 }
1087
1088 fn create_divergence_pipeline(
1089 device: &wgpu::Device,
1090 ) -> Result<wgpu::ComputePipeline, GpuError> {
1091 Self::create_tensor_pipeline(device)
1093 }
1094}
1095
1096pub struct GpuDeviceInfo {
1098 is_gpu: bool,
1099 #[allow(dead_code)]
1100 description: String,
1101}
1102
1103impl GpuDeviceInfo {
1104 fn new(is_gpu: bool, description: &str) -> Self {
1105 Self {
1106 is_gpu,
1107 description: description.to_string(),
1108 }
1109 }
1110
1111 pub fn is_gpu(&self) -> bool {
1112 self.is_gpu
1113 }
1114
1115 pub fn supports_webgpu(&self) -> bool {
1116 self.is_gpu
1117 }
1118
1119 pub fn is_initialized(&self) -> bool {
1120 true
1121 }
1122}
1123
1124pub struct GpuFisherMatrix {
1126 matrix: Vec<Vec<f64>>,
1127}
1128
1129impl GpuFisherMatrix {
1130 fn new(matrix: Vec<Vec<f64>>) -> Self {
1131 Self { matrix }
1132 }
1133
1134 pub async fn eigenvalues(&self) -> Result<Vec<f64>, GpuError> {
1135 let mut eigenvals = Vec::new();
1137 for i in 0..self.matrix.len() {
1138 if i < self.matrix[i].len() {
1139 eigenvals.push(self.matrix[i][i]);
1140 }
1141 }
1142 Ok(eigenvals)
1143 }
1144}
1145
1146const TENSOR_COMPUTE_SHADER: &str = r#"
1148@group(0) @binding(0)
1149var<storage, read> x_batch: array<vec3<f32>>;
1150
1151@group(0) @binding(1)
1152var<storage, read> y_batch: array<vec3<f32>>;
1153
1154@group(0) @binding(2)
1155var<storage, read> z_batch: array<vec3<f32>>;
1156
1157@group(0) @binding(3)
1158var<storage, read_write> output: array<f32>;
1159
1160@compute @workgroup_size(64)
1161fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1162 let idx = global_id.x;
1163 if (idx >= arrayLength(&x_batch)) {
1164 return;
1165 }
1166
1167 let x = x_batch[idx];
1168 let y = y_batch[idx];
1169 let z = z_batch[idx];
1170
1171 // Compute scalar triple product: x · (y × z)
1172 let cross_yz = cross(y, z);
1173 let scalar_triple = dot(x, cross_yz);
1174
1175 output[idx] = scalar_triple;
1176}
1177"#;
1178
1179#[cfg(test)]
1180mod tests {
1181 use super::*;
1182
1183 #[test]
1184 fn test_should_use_gpu() {
1185 assert!(!GpuCliffordAlgebra::should_use_gpu(10));
1186 assert!(GpuCliffordAlgebra::should_use_gpu(1000));
1187 }
1188
1189 #[test]
1190 fn test_should_use_gpu_threshold() {
1191 assert!(!GpuCliffordAlgebra::should_use_gpu(99));
1193 assert!(GpuCliffordAlgebra::should_use_gpu(100));
1194 assert!(GpuCliffordAlgebra::should_use_gpu(101));
1195 }
1196
1197 #[test]
1198 fn test_should_use_gpu_zero_batch() {
1199 assert!(!GpuCliffordAlgebra::should_use_gpu(0));
1200 }
1201
1202 #[test]
1203 fn test_should_use_gpu_single_element() {
1204 assert!(!GpuCliffordAlgebra::should_use_gpu(1));
1205 }
1206
1207 #[test]
1208 fn test_gpu_error_display() {
1209 let init_err = GpuError::InitializationError("No GPU found".to_string());
1210 assert!(init_err.to_string().contains("Failed to initialize GPU"));
1211
1212 let buffer_err = GpuError::BufferError("Buffer too small".to_string());
1213 assert!(buffer_err.to_string().contains("buffer error"));
1214
1215 let shader_err = GpuError::ShaderError("Invalid syntax".to_string());
1216 assert!(shader_err.to_string().contains("Shader compilation"));
1217 }
1218
1219 #[test]
1220 fn test_gpu_error_debug() {
1221 let err = GpuError::InitializationError("test".to_string());
1222 let debug_str = format!("{:?}", err);
1223 assert!(debug_str.contains("InitializationError"));
1224 }
1225
1226 #[test]
1227 fn test_gpu_error_variants() {
1228 let errors = vec![
1230 GpuError::InitializationError("init failed".to_string()),
1231 GpuError::BufferError("buffer failed".to_string()),
1232 GpuError::ShaderError("shader failed".to_string()),
1233 ];
1234
1235 for err in errors {
1236 let _ = err.to_string();
1238 let _ = format!("{:?}", err);
1240 }
1241 }
1242
1243 #[tokio::test]
1244 #[ignore = "GPU hardware required, may fail in CI/CD environments"]
1245 async fn test_adaptive_compute_geometric_product() {
1246 let adaptive = AdaptiveCompute::new::<3, 0, 0>().await;
1248
1249 let e1 = Multivector::<3, 0, 0>::basis_vector(0);
1250 let e2 = Multivector::<3, 0, 0>::basis_vector(1);
1251
1252 let result = adaptive.geometric_product(&e1, &e2).await;
1254 assert!(result.magnitude() > 0.0);
1256 }
1257
1258 #[tokio::test]
1259 #[ignore = "GPU hardware required, may fail in CI/CD environments"]
1260 async fn test_adaptive_compute_batch_small() {
1261 let adaptive = AdaptiveCompute::new::<3, 0, 0>().await;
1264
1265 let e1_coeffs = vec![0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]; let e2_coeffs = vec![0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]; let mut a_batch = Vec::new();
1271 let mut b_batch = Vec::new();
1272 a_batch.extend_from_slice(&e1_coeffs);
1273 a_batch.extend_from_slice(&e1_coeffs);
1274 b_batch.extend_from_slice(&e2_coeffs);
1275 b_batch.extend_from_slice(&e2_coeffs);
1276
1277 let result = adaptive.batch_geometric_product(&a_batch, &b_batch).await;
1278 assert!(result.is_ok());
1279 assert_eq!(result.unwrap().len(), 16);
1281 }
1282
1283 #[test]
1284 fn test_gpu_device_info_methods() {
1285 let info = GpuDeviceInfo::new(true, "Test GPU");
1286
1287 assert!(info.is_gpu());
1288 assert!(info.supports_webgpu());
1290 assert!(info.is_initialized());
1291 }
1292
1293 #[test]
1294 fn test_gpu_device_info_cpu() {
1295 let info = GpuDeviceInfo::new(false, "CPU Fallback");
1296
1297 assert!(!info.is_gpu());
1298 assert!(!info.supports_webgpu());
1299 assert!(info.is_initialized());
1300 }
1301
1302 #[tokio::test]
1303 async fn test_gpu_info_geometry_creation() {
1304 if std::env::var("CI").is_ok()
1306 || std::env::var("GITHUB_ACTIONS").is_ok()
1307 || std::env::var("DISPLAY").is_err()
1308 {
1309 println!("Skipping GPU test in CI environment");
1310 return;
1311 }
1312
1313 match GpuInfoGeometry::new().await {
1315 Ok(_) => {
1316 println!("GPU initialization successful");
1318 }
1319 Err(GpuError::InitializationError(_)) => {
1320 println!("GPU initialization failed - no GPU available");
1322 }
1323 Err(e) => panic!("Unexpected error: {:?}", e),
1324 }
1325 }
1326
1327 #[tokio::test]
1328 async fn test_gpu_fisher_matrix_eigenvalues() {
1329 let matrix = GpuFisherMatrix::new(vec![
1330 vec![1.0, 0.0, 0.0],
1331 vec![0.0, 2.0, 0.0],
1332 vec![0.0, 0.0, 3.0],
1333 ]);
1334 let eigenvalues = matrix.eigenvalues().await.unwrap();
1335 assert_eq!(eigenvalues.len(), 3);
1336 assert_eq!(eigenvalues[0], 1.0);
1337 assert_eq!(eigenvalues[1], 2.0);
1338 assert_eq!(eigenvalues[2], 3.0);
1339 }
1340
1341 #[tokio::test]
1342 async fn test_gpu_fisher_matrix_empty() {
1343 let matrix = GpuFisherMatrix::new(vec![]);
1344 let eigenvalues = matrix.eigenvalues().await.unwrap();
1345 assert!(eigenvalues.is_empty());
1346 }
1347
1348 #[tokio::test]
1349 async fn test_gpu_fisher_matrix_single_element() {
1350 let matrix = GpuFisherMatrix::new(vec![vec![5.0]]);
1351 let eigenvalues = matrix.eigenvalues().await.unwrap();
1352 assert_eq!(eigenvalues.len(), 1);
1353 assert_eq!(eigenvalues[0], 5.0);
1354 }
1355
1356 #[test]
1357 fn test_cayley_entry_struct() {
1358 let entry = CayleyEntry {
1360 sign: 1.0,
1361 index: 5,
1362 };
1363 assert_eq!(entry.sign, 1.0);
1364 assert_eq!(entry.index, 5);
1365
1366 let entries = vec![
1368 entry,
1369 CayleyEntry {
1370 sign: -1.0,
1371 index: 3,
1372 },
1373 ];
1374 let bytes: &[u8] = bytemuck::cast_slice(&entries);
1375 assert_eq!(bytes.len(), 16); }
1377
1378 #[test]
1379 fn test_cayley_entry_zero() {
1380 let zeroed: CayleyEntry = bytemuck::Zeroable::zeroed();
1382 assert_eq!(zeroed.sign, 0.0);
1383 assert_eq!(zeroed.index, 0);
1384 }
1385
1386 #[test]
1387 fn test_geometric_product_shader_not_empty() {
1388 assert!(!GEOMETRIC_PRODUCT_SHADER.is_empty());
1390 assert!(GEOMETRIC_PRODUCT_SHADER.contains("@compute"));
1391 assert!(GEOMETRIC_PRODUCT_SHADER.contains("@workgroup_size"));
1392 assert!(GEOMETRIC_PRODUCT_SHADER.contains("CayleyEntry"));
1393 }
1394
1395 #[test]
1396 fn test_tensor_compute_shader_not_empty() {
1397 assert!(!TENSOR_COMPUTE_SHADER.is_empty());
1399 assert!(TENSOR_COMPUTE_SHADER.contains("@compute"));
1400 assert!(TENSOR_COMPUTE_SHADER.contains("@workgroup_size"));
1401 assert!(TENSOR_COMPUTE_SHADER.contains("cross"));
1402 assert!(TENSOR_COMPUTE_SHADER.contains("dot"));
1403 }
1404
1405 #[test]
1406 fn test_gpu_error_from_string() {
1407 let msg = "Custom error message";
1409 let err = GpuError::InitializationError(msg.to_string());
1410 assert!(err.to_string().contains(msg));
1411
1412 let err = GpuError::BufferError(msg.to_string());
1413 assert!(err.to_string().contains(msg));
1414
1415 let err = GpuError::ShaderError(msg.to_string());
1416 assert!(err.to_string().contains(msg));
1417 }
1418
1419 #[test]
1420 fn test_should_use_gpu_large_batch() {
1421 assert!(GpuCliffordAlgebra::should_use_gpu(1000));
1423 assert!(GpuCliffordAlgebra::should_use_gpu(10000));
1424 assert!(GpuCliffordAlgebra::should_use_gpu(100000));
1425 }
1426
1427 #[test]
1428 fn test_should_use_gpu_near_threshold() {
1429 for i in 0..100 {
1431 assert!(!GpuCliffordAlgebra::should_use_gpu(i));
1432 }
1433 for i in 100..200 {
1434 assert!(GpuCliffordAlgebra::should_use_gpu(i));
1435 }
1436 }
1437
1438 #[test]
1439 fn test_gpu_device_info_initialized_always_true() {
1440 let gpu_info = GpuDeviceInfo::new(true, "GPU");
1442 let cpu_info = GpuDeviceInfo::new(false, "CPU");
1443 assert!(gpu_info.is_initialized());
1444 assert!(cpu_info.is_initialized());
1445 }
1446
1447 #[tokio::test]
1448 async fn test_adaptive_compute_cpu_fallback_small_batch() {
1449 let adaptive = AdaptiveCompute { gpu: None };
1451
1452 let e1_coeffs = vec![0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0];
1453 let e2_coeffs = vec![0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0];
1454
1455 let result = adaptive
1456 .batch_geometric_product(&e1_coeffs, &e2_coeffs)
1457 .await;
1458 assert!(result.is_ok());
1459 let coeffs = result.unwrap();
1460 assert_eq!(coeffs.len(), 8);
1461 let has_nonzero = coeffs.iter().any(|&c| c.abs() > 0.5);
1463 assert!(has_nonzero, "Product should have non-zero coefficients");
1464 }
1465
1466 #[tokio::test]
1467 async fn test_adaptive_compute_cpu_fallback_multiple_elements() {
1468 let adaptive = AdaptiveCompute { gpu: None };
1470
1471 let mut a_batch = Vec::new();
1472 let mut b_batch = Vec::new();
1473
1474 for _ in 0..5 {
1476 a_batch.extend_from_slice(&[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
1477 b_batch.extend_from_slice(&[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
1478 }
1479
1480 let result = adaptive.batch_geometric_product(&a_batch, &b_batch).await;
1481 assert!(result.is_ok());
1482 assert_eq!(result.unwrap().len(), 40); }
1484
1485 #[tokio::test]
1486 async fn test_adaptive_compute_geometric_product_cpu() {
1487 let adaptive = AdaptiveCompute { gpu: None };
1489
1490 let a = Multivector::<3, 0, 0>::scalar(2.0);
1491 let b = Multivector::<3, 0, 0>::scalar(3.0);
1492
1493 let result = adaptive.geometric_product(&a, &b).await;
1494 assert!((result.scalar_part() - 6.0).abs() < 1e-10);
1495 }
1496
1497 #[tokio::test]
1498 async fn test_adaptive_compute_geometric_product_basis_vectors() {
1499 let adaptive = AdaptiveCompute { gpu: None };
1501
1502 let e1 = Multivector::<3, 0, 0>::basis_vector(0);
1503 let e1_clone = Multivector::<3, 0, 0>::basis_vector(0);
1504
1505 let result = adaptive.geometric_product(&e1, &e1_clone).await;
1507 assert!((result.scalar_part() - 1.0).abs() < 1e-10);
1508 }
1509}