1pub mod adaptive;
108#[cfg(feature = "automata")]
109pub mod automata;
110pub mod benchmarks;
111#[cfg(feature = "calculus")]
112pub mod calculus;
113#[cfg(feature = "dual")]
114pub mod dual;
115#[cfg(feature = "enumerative")]
116pub mod enumerative;
117#[cfg(feature = "functional")]
118pub mod functional;
119#[cfg(feature = "holographic")]
124pub mod holographic;
125#[cfg(feature = "measure")]
126pub mod measure;
127pub mod multi_gpu;
128pub mod network;
129pub mod performance;
130#[cfg(feature = "probabilistic")]
131pub mod probabilistic;
132pub mod relativistic;
133pub mod shaders;
134pub mod timeline;
135#[cfg(feature = "topology")]
140pub mod topology;
141pub mod unified;
142pub mod verification;
143
144pub use adaptive::{
145 AdaptiveVerificationError, AdaptiveVerificationLevel, AdaptiveVerifier, CpuFeatures,
146 GpuBackend, PlatformCapabilities, PlatformPerformanceProfile, VerificationPlatform,
147 WasmEnvironment,
148};
149use amari_core::Multivector;
150use amari_info_geom::amari_chentsov_tensor;
151pub use benchmarks::{
152 AmariMultiGpuBenchmarks, BenchmarkConfig, BenchmarkResult, BenchmarkRunner,
153 BenchmarkSuiteResults, BenchmarkSummary, ScalingAnalysis,
154};
155use bytemuck::{Pod, Zeroable};
156#[cfg(feature = "calculus")]
157pub use calculus::GpuCalculus;
158#[cfg(feature = "functional")]
159pub use functional::{
160 AdaptiveFunctionalCompute, GpuFunctionalError, GpuFunctionalResult, GpuHilbertSpace,
161 GpuMatrixOperator, GpuSpectralDecomposition,
162};
163#[cfg(feature = "holographic")]
164pub use holographic::{
165 GpuHolographic, GpuHolographicError, GpuHolographicMemory, GpuHolographicResult,
166 GpuOpticalField,
167};
168#[cfg(feature = "measure")]
169pub use measure::{
170 GpuIntegrator, GpuMonteCarloIntegrator, GpuMultidimIntegrator, GpuParametricDensity,
171 GpuTropicalMeasure,
172};
173pub use multi_gpu::{
174 ComputeIntensity, DeviceCapabilities, DeviceId, DeviceWorkload, GpuArchitecture, GpuDevice,
175 IntelligentLoadBalancer, LoadBalancingStrategy, MultiGpuBarrier, PerformanceRecord,
176 PerformanceStats, SynchronizationManager, Workload, WorkloadCoordinator,
177};
178pub use network::{AdaptiveNetworkCompute, GpuGeometricNetwork, GpuNetworkError, GpuNetworkResult};
179pub use performance::{
180 AdaptiveDispatchPolicy, CalibrationResult, GpuProfile, GpuProfiler, WorkgroupConfig,
181 WorkgroupOptimizer,
182};
183#[cfg(feature = "probabilistic")]
184pub use probabilistic::{GpuProbabilistic, GpuProbabilisticError, GpuProbabilisticResult};
185pub use relativistic::{
186 GpuRelativisticParticle, GpuRelativisticPhysics, GpuSpacetimeVector, GpuTrajectoryParams,
187};
188pub use shaders::{
189 ShaderLibrary, DUAL_SHADERS, FUSION_SHADERS, TOPOLOGY_SHADERS, TROPICAL_SHADERS,
190};
191use thiserror::Error;
192pub use timeline::{
193 BottleneckAnalysis, DeviceUtilizationStats, GpuTimelineAnalyzer, MultiGpuPerformanceMonitor,
194 OptimizationRecommendation, PerformanceAnalysisReport, PerformanceBottleneck,
195 PerformanceSummary, RecommendationPriority, SynchronizationAnalysis, TimelineEvent,
196 UtilizationAnalysis,
197};
198#[cfg(feature = "topology")]
199pub use topology::{
200 AdaptiveTopologyCompute, GpuCriticalPoint, GpuTopology, GpuTopologyError, GpuTopologyResult,
201};
202pub use unified::{
203 BufferPoolStats, EnhancedGpuBufferPool, GpuAccelerated, GpuContext, GpuDispatcher,
204 GpuOperationParams, GpuParam, MultiGpuStats, PoolEntryStats, SharedGpuContext, UnifiedGpuError,
205 UnifiedGpuResult,
206};
207pub use verification::{
208 GpuBoundaryVerifier, GpuVerificationError, RelativisticVerifier, StatisticalGpuVerifier,
209 VerificationConfig, VerificationStrategy, VerifiedMultivector,
210};
211use wgpu::util::DeviceExt;
212
213#[derive(Error, Debug)]
214pub enum GpuError {
215 #[error("Failed to initialize GPU: {0}")]
216 InitializationError(String),
217
218 #[error("GPU buffer error: {0}")]
219 BufferError(String),
220
221 #[error("Shader compilation error: {0}")]
222 ShaderError(String),
223}
224
225pub struct GpuCliffordAlgebra {
227 device: wgpu::Device,
228 queue: wgpu::Queue,
229 compute_pipeline: wgpu::ComputePipeline,
230 cayley_buffer: wgpu::Buffer,
231 #[allow(dead_code)]
232 dim: usize,
233 basis_count: usize,
234}
235
236impl GpuCliffordAlgebra {
237 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Result<Self, GpuError> {
239 let instance = wgpu::Instance::default();
240
241 let adapter = instance
242 .request_adapter(&wgpu::RequestAdapterOptions {
243 power_preference: wgpu::PowerPreference::HighPerformance,
244 compatible_surface: None,
245 force_fallback_adapter: false,
246 })
247 .await
248 .ok_or_else(|| GpuError::InitializationError("No GPU adapter found".to_string()))?;
249
250 let (device, queue) = adapter
251 .request_device(
252 &wgpu::DeviceDescriptor {
253 label: Some("Amari GPU Device"),
254 required_features: wgpu::Features::empty(),
255 required_limits: wgpu::Limits::default(),
256 },
257 None,
258 )
259 .await
260 .map_err(|e| GpuError::InitializationError(e.to_string()))?;
261
262 let dim = P + Q + R;
263 let basis_count = 1 << dim;
264
265 let cayley_table = Self::generate_cayley_table::<P, Q, R>();
267 let cayley_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
268 label: Some("Cayley Table"),
269 contents: bytemuck::cast_slice(&cayley_table),
270 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
271 });
272
273 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
275 label: Some("Geometric Product Shader"),
276 source: wgpu::ShaderSource::Wgsl(GEOMETRIC_PRODUCT_SHADER.into()),
277 });
278
279 let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
280 label: Some("Compute Bind Group Layout"),
281 entries: &[
282 wgpu::BindGroupLayoutEntry {
283 binding: 0,
284 visibility: wgpu::ShaderStages::COMPUTE,
285 ty: wgpu::BindingType::Buffer {
286 ty: wgpu::BufferBindingType::Storage { read_only: true },
287 has_dynamic_offset: false,
288 min_binding_size: None,
289 },
290 count: None,
291 },
292 wgpu::BindGroupLayoutEntry {
293 binding: 1,
294 visibility: wgpu::ShaderStages::COMPUTE,
295 ty: wgpu::BindingType::Buffer {
296 ty: wgpu::BufferBindingType::Storage { read_only: true },
297 has_dynamic_offset: false,
298 min_binding_size: None,
299 },
300 count: None,
301 },
302 wgpu::BindGroupLayoutEntry {
303 binding: 2,
304 visibility: wgpu::ShaderStages::COMPUTE,
305 ty: wgpu::BindingType::Buffer {
306 ty: wgpu::BufferBindingType::Storage { read_only: true },
307 has_dynamic_offset: false,
308 min_binding_size: None,
309 },
310 count: None,
311 },
312 wgpu::BindGroupLayoutEntry {
313 binding: 3,
314 visibility: wgpu::ShaderStages::COMPUTE,
315 ty: wgpu::BindingType::Buffer {
316 ty: wgpu::BufferBindingType::Storage { read_only: false },
317 has_dynamic_offset: false,
318 min_binding_size: None,
319 },
320 count: None,
321 },
322 ],
323 });
324
325 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
326 label: Some("Compute Pipeline Layout"),
327 bind_group_layouts: &[&bind_group_layout],
328 push_constant_ranges: &[],
329 });
330
331 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
332 label: Some("Geometric Product Pipeline"),
333 layout: Some(&pipeline_layout),
334 module: &shader,
335 entry_point: "main",
336 });
337
338 Ok(Self {
339 device,
340 queue,
341 compute_pipeline,
342 cayley_buffer,
343 dim,
344 basis_count,
345 })
346 }
347
348 fn generate_cayley_table<const P: usize, const Q: usize, const R: usize>() -> Vec<CayleyEntry> {
350 use amari_core::cayley::CayleyTable;
351
352 let table = CayleyTable::<P, Q, R>::get();
353 let basis_count = 1 << (P + Q + R);
354 let mut flat_table = Vec::with_capacity(basis_count * basis_count);
355
356 for i in 0..basis_count {
357 for j in 0..basis_count {
358 let (sign, index) = table.get_product(i, j);
359 flat_table.push(CayleyEntry {
360 sign: sign as f32,
361 index: index as u32,
362 });
363 }
364 }
365
366 flat_table
367 }
368
369 pub async fn batch_geometric_product(
371 &self,
372 a_batch: &[f64],
373 b_batch: &[f64],
374 ) -> Result<Vec<f64>, GpuError> {
375 let batch_size = a_batch.len() / self.basis_count;
376
377 if a_batch.len() != b_batch.len() {
378 return Err(GpuError::BufferError(
379 "Input batches must have same size".to_string(),
380 ));
381 }
382
383 let a_f32: Vec<f32> = a_batch.iter().map(|&x| x as f32).collect();
385 let b_f32: Vec<f32> = b_batch.iter().map(|&x| x as f32).collect();
386
387 let a_buffer = self
389 .device
390 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
391 label: Some("A Buffer"),
392 contents: bytemuck::cast_slice(&a_f32),
393 usage: wgpu::BufferUsages::STORAGE,
394 });
395
396 let b_buffer = self
397 .device
398 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
399 label: Some("B Buffer"),
400 contents: bytemuck::cast_slice(&b_f32),
401 usage: wgpu::BufferUsages::STORAGE,
402 });
403
404 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
405 label: Some("Output Buffer"),
406 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
407 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
408 mapped_at_creation: false,
409 });
410
411 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
412 label: Some("Staging Buffer"),
413 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
414 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
415 mapped_at_creation: false,
416 });
417
418 let bind_group_layout = self.compute_pipeline.get_bind_group_layout(0);
420 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
421 label: Some("Compute Bind Group"),
422 layout: &bind_group_layout,
423 entries: &[
424 wgpu::BindGroupEntry {
425 binding: 0,
426 resource: self.cayley_buffer.as_entire_binding(),
427 },
428 wgpu::BindGroupEntry {
429 binding: 1,
430 resource: a_buffer.as_entire_binding(),
431 },
432 wgpu::BindGroupEntry {
433 binding: 2,
434 resource: b_buffer.as_entire_binding(),
435 },
436 wgpu::BindGroupEntry {
437 binding: 3,
438 resource: output_buffer.as_entire_binding(),
439 },
440 ],
441 });
442
443 let mut encoder = self
445 .device
446 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
447 label: Some("Compute Encoder"),
448 });
449
450 {
451 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
452 label: Some("Compute Pass"),
453 timestamp_writes: None,
454 });
455
456 compute_pass.set_pipeline(&self.compute_pipeline);
457 compute_pass.set_bind_group(0, &bind_group, &[]);
458 compute_pass.dispatch_workgroups(batch_size as u32, 1, 1);
459 }
460
461 encoder.copy_buffer_to_buffer(
462 &output_buffer,
463 0,
464 &staging_buffer,
465 0,
466 (a_batch.len() * std::mem::size_of::<f32>()) as u64,
467 );
468
469 self.queue.submit(Some(encoder.finish()));
470
471 let buffer_slice = staging_buffer.slice(..);
473 let (sender, receiver) = futures::channel::oneshot::channel();
474 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
475 sender.send(result).unwrap();
476 });
477
478 self.device.poll(wgpu::Maintain::Wait);
479 receiver
480 .await
481 .unwrap()
482 .map_err(|e| GpuError::BufferError(e.to_string()))?;
483
484 let data = buffer_slice.get_mapped_range();
485 let result_f32: &[f32] = bytemuck::cast_slice(&data);
486 let result: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
487
488 drop(data);
489 staging_buffer.unmap();
490
491 Ok(result)
492 }
493
494 pub fn should_use_gpu(operation_count: usize) -> bool {
496 operation_count >= 100
498 }
499}
500
501#[repr(C)]
503#[derive(Copy, Clone, Pod, Zeroable)]
504struct CayleyEntry {
505 sign: f32,
506 index: u32,
507}
508
509const GEOMETRIC_PRODUCT_SHADER: &str = r#"
511struct CayleyEntry {
512 sign: f32,
513 index: u32,
514}
515
516@group(0) @binding(0)
517var<storage, read> cayley_table: array<CayleyEntry>;
518
519@group(0) @binding(1)
520var<storage, read> a_batch: array<f32>;
521
522@group(0) @binding(2)
523var<storage, read> b_batch: array<f32>;
524
525@group(0) @binding(3)
526var<storage, read_write> output: array<f32>;
527
528const BASIS_COUNT: u32 = 8u; // For 3D Clifford algebra
529
530@compute @workgroup_size(1)
531fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
532 let batch_idx = global_id.x;
533 let offset = batch_idx * BASIS_COUNT;
534
535 // Clear output
536 for (var k = 0u; k < BASIS_COUNT; k = k + 1u) {
537 output[offset + k] = 0.0;
538 }
539
540 // Compute geometric product
541 for (var i = 0u; i < BASIS_COUNT; i = i + 1u) {
542 let a_coeff = a_batch[offset + i];
543 if (abs(a_coeff) < 1e-14) {
544 continue;
545 }
546
547 for (var j = 0u; j < BASIS_COUNT; j = j + 1u) {
548 let b_coeff = b_batch[offset + j];
549 if (abs(b_coeff) < 1e-14) {
550 continue;
551 }
552
553 let table_idx = i * BASIS_COUNT + j;
554 let entry = cayley_table[table_idx];
555 output[offset + entry.index] += entry.sign * a_coeff * b_coeff;
556 }
557 }
558}
559"#;
560
561pub struct AdaptiveCompute {
563 gpu: Option<GpuCliffordAlgebra>,
564}
565
566impl AdaptiveCompute {
567 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Self {
569 let gpu = GpuCliffordAlgebra::new::<P, Q, R>().await.ok();
570 Self { gpu }
571 }
572
573 pub async fn geometric_product<const P: usize, const Q: usize, const R: usize>(
575 &self,
576 a: &Multivector<P, Q, R>,
577 b: &Multivector<P, Q, R>,
578 ) -> Multivector<P, Q, R> {
579 a.geometric_product(b)
581 }
582
583 pub async fn batch_geometric_product(
585 &self,
586 a_batch: &[f64],
587 b_batch: &[f64],
588 ) -> Result<Vec<f64>, GpuError> {
589 let batch_size = a_batch.len() / 8; if let Some(gpu) = &self.gpu {
592 if GpuCliffordAlgebra::should_use_gpu(batch_size) {
593 return gpu.batch_geometric_product(a_batch, b_batch).await;
594 }
595 }
596
597 let mut result = Vec::with_capacity(a_batch.len());
599 for i in 0..batch_size {
600 let start = i * 8;
601 let end = start + 8;
602
603 let a = Multivector::<3, 0, 0>::from_coefficients(a_batch[start..end].to_vec());
604 let b = Multivector::<3, 0, 0>::from_coefficients(b_batch[start..end].to_vec());
605 let product = a.geometric_product(&b);
606
607 for j in 0..8 {
608 result.push(product.get(j));
609 }
610 }
611
612 Ok(result)
613 }
614}
615
616pub struct GpuInfoGeometry {
627 device: wgpu::Device,
628 queue: wgpu::Queue,
629 tensor_pipeline: wgpu::ComputePipeline,
630 #[allow(dead_code)]
631 fisher_pipeline: wgpu::ComputePipeline,
632 #[allow(dead_code)]
633 divergence_pipeline: wgpu::ComputePipeline,
634}
635
636impl GpuInfoGeometry {
637 pub async fn new() -> Result<Self, GpuError> {
639 let instance = wgpu::Instance::default();
640
641 let adapter = if let Some(adapter) = instance
643 .request_adapter(&wgpu::RequestAdapterOptions {
644 power_preference: wgpu::PowerPreference::HighPerformance,
645 compatible_surface: None,
646 force_fallback_adapter: false,
647 })
648 .await
649 {
650 adapter
651 } else if let Some(adapter) = instance
652 .request_adapter(&wgpu::RequestAdapterOptions {
653 power_preference: wgpu::PowerPreference::LowPower,
654 compatible_surface: None,
655 force_fallback_adapter: false,
656 })
657 .await
658 {
659 adapter
660 } else if let Some(adapter) = instance
661 .request_adapter(&wgpu::RequestAdapterOptions {
662 power_preference: wgpu::PowerPreference::None,
663 compatible_surface: None,
664 force_fallback_adapter: true,
665 })
666 .await
667 {
668 adapter
669 } else {
670 return Err(GpuError::InitializationError(
671 "No GPU adapter found".to_string(),
672 ));
673 };
674
675 let (device, queue) = adapter
676 .request_device(
677 &wgpu::DeviceDescriptor {
678 label: Some("Amari GPU Info Geometry Device"),
679 required_features: wgpu::Features::empty(),
680 required_limits: wgpu::Limits::default(),
681 },
682 None,
683 )
684 .await
685 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
686
687 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
689 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
690 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
691
692 Ok(Self {
693 device,
694 queue,
695 tensor_pipeline,
696 fisher_pipeline,
697 divergence_pipeline,
698 })
699 }
700
701 pub async fn new_with_device_preference(device_type: &str) -> Result<Self, GpuError> {
703 let (power_preference, force_fallback) = match device_type {
704 "high-performance" => (wgpu::PowerPreference::HighPerformance, false),
705 "low-power" => (wgpu::PowerPreference::LowPower, false),
706 "fallback" => (wgpu::PowerPreference::None, true),
707 _ => {
708 return Err(GpuError::InitializationError(
709 "Invalid device type".to_string(),
710 ))
711 }
712 };
713
714 let instance = wgpu::Instance::default();
715
716 let adapter = instance
717 .request_adapter(&wgpu::RequestAdapterOptions {
718 power_preference,
719 compatible_surface: None,
720 force_fallback_adapter: force_fallback,
721 })
722 .await
723 .ok_or_else(|| {
724 GpuError::InitializationError("No suitable adapter found".to_string())
725 })?;
726
727 let (device, queue) = adapter
728 .request_device(
729 &wgpu::DeviceDescriptor {
730 label: Some("Amari GPU Info Geometry Device"),
731 required_features: wgpu::Features::empty(),
732 required_limits: wgpu::Limits::default(),
733 },
734 None,
735 )
736 .await
737 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
738
739 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
740 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
741 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
742
743 Ok(Self {
744 device,
745 queue,
746 tensor_pipeline,
747 fisher_pipeline,
748 divergence_pipeline,
749 })
750 }
751
752 pub async fn amari_chentsov_tensor(
754 &self,
755 x: &Multivector<3, 0, 0>,
756 y: &Multivector<3, 0, 0>,
757 z: &Multivector<3, 0, 0>,
758 ) -> Result<f64, GpuError> {
759 Ok(amari_chentsov_tensor(x, y, z))
761 }
762
763 pub async fn amari_chentsov_tensor_batch(
774 &self,
775 x_batch: &[Multivector<3, 0, 0>],
776 y_batch: &[Multivector<3, 0, 0>],
777 z_batch: &[Multivector<3, 0, 0>],
778 ) -> Result<Vec<f64>, GpuError> {
779 let batch_size = x_batch.len();
780 if batch_size == 0 {
781 return Ok(Vec::new());
782 }
783
784 if batch_size < 100 {
786 let results = x_batch
787 .iter()
788 .zip(y_batch.iter())
789 .zip(z_batch.iter())
790 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
791 .collect();
792 return Ok(results);
793 }
794
795 let results = x_batch
799 .iter()
800 .zip(y_batch.iter())
801 .zip(z_batch.iter())
802 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
803 .collect();
804 Ok(results)
805 }
806
807 pub async fn amari_chentsov_tensor_from_typed_arrays(
809 &self,
810 flat_data: &[f64],
811 batch_size: usize,
812 ) -> Result<Vec<f64>, GpuError> {
813 if flat_data.len() != batch_size * 9 {
814 return Err(GpuError::BufferError("Invalid flat data size".to_string()));
815 }
816
817 let mut x_batch = Vec::with_capacity(batch_size);
819 let mut y_batch = Vec::with_capacity(batch_size);
820 let mut z_batch = Vec::with_capacity(batch_size);
821
822 for i in 0..batch_size {
823 let base = i * 9;
824 let mut x = Multivector::zero();
825 let mut y = Multivector::zero();
826 let mut z = Multivector::zero();
827
828 x.set_vector_component(0, flat_data[base]);
830 x.set_vector_component(1, flat_data[base + 1]);
831 x.set_vector_component(2, flat_data[base + 2]);
832
833 y.set_vector_component(0, flat_data[base + 3]);
834 y.set_vector_component(1, flat_data[base + 4]);
835 y.set_vector_component(2, flat_data[base + 5]);
836
837 z.set_vector_component(0, flat_data[base + 6]);
838 z.set_vector_component(1, flat_data[base + 7]);
839 z.set_vector_component(2, flat_data[base + 8]);
840
841 x_batch.push(x);
842 y_batch.push(y);
843 z_batch.push(z);
844 }
845
846 self.amari_chentsov_tensor_batch(&x_batch, &y_batch, &z_batch)
847 .await
848 }
849
850 pub async fn device_info(&self) -> Result<GpuDeviceInfo, GpuError> {
852 Ok(GpuDeviceInfo::new(true, "WebGPU Device"))
853 }
854
855 pub async fn memory_usage(&self) -> Result<u64, GpuError> {
857 Ok(1024 * 1024) }
860
861 pub async fn fisher_information_matrix(
863 &self,
864 _parameters: &[f64],
865 ) -> Result<GpuFisherMatrix, GpuError> {
866 Ok(GpuFisherMatrix::new(vec![vec![1.0, 0.0], vec![0.0, 1.0]]))
868 }
869
870 pub async fn bregman_divergence_batch(
872 &self,
873 p_batch: &[Vec<f64>],
874 q_batch: &[Vec<f64>],
875 ) -> Result<Vec<f64>, GpuError> {
876 let results = p_batch
878 .iter()
879 .zip(q_batch.iter())
880 .map(|(p, q)| {
881 p.iter()
883 .zip(q.iter())
884 .map(|(pi, qi)| {
885 if *pi > 0.0 && *qi > 0.0 {
886 pi * (pi / qi).ln()
887 } else {
888 0.0
889 }
890 })
891 .sum()
892 })
893 .collect();
894 Ok(results)
895 }
896
897 #[allow(dead_code)] async fn compute_tensor_batch_gpu(
909 &self,
910 x_batch: &[Multivector<3, 0, 0>],
911 y_batch: &[Multivector<3, 0, 0>],
912 z_batch: &[Multivector<3, 0, 0>],
913 ) -> Result<Vec<f64>, GpuError> {
914 let batch_size = x_batch.len();
915
916 let x_data: Vec<f32> = x_batch
918 .iter()
919 .flat_map(|mv| {
920 vec![
921 mv.vector_component(0) as f32,
922 mv.vector_component(1) as f32,
923 mv.vector_component(2) as f32,
924 ]
925 })
926 .collect();
927
928 let y_data: Vec<f32> = y_batch
929 .iter()
930 .flat_map(|mv| {
931 vec![
932 mv.vector_component(0) as f32,
933 mv.vector_component(1) as f32,
934 mv.vector_component(2) as f32,
935 ]
936 })
937 .collect();
938
939 let z_data: Vec<f32> = z_batch
940 .iter()
941 .flat_map(|mv| {
942 vec![
943 mv.vector_component(0) as f32,
944 mv.vector_component(1) as f32,
945 mv.vector_component(2) as f32,
946 ]
947 })
948 .collect();
949
950 let x_buffer = self
952 .device
953 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
954 label: Some("X Batch Buffer"),
955 contents: bytemuck::cast_slice(&x_data),
956 usage: wgpu::BufferUsages::STORAGE,
957 });
958
959 let y_buffer = self
960 .device
961 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
962 label: Some("Y Batch Buffer"),
963 contents: bytemuck::cast_slice(&y_data),
964 usage: wgpu::BufferUsages::STORAGE,
965 });
966
967 let z_buffer = self
968 .device
969 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
970 label: Some("Z Batch Buffer"),
971 contents: bytemuck::cast_slice(&z_data),
972 usage: wgpu::BufferUsages::STORAGE,
973 });
974
975 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
976 label: Some("Output Buffer"),
977 size: (batch_size * 4) as u64, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
979 mapped_at_creation: false,
980 });
981
982 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
983 label: Some("Staging Buffer"),
984 size: (batch_size * 4) as u64,
985 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
986 mapped_at_creation: false,
987 });
988
989 let bind_group_layout = self.tensor_pipeline.get_bind_group_layout(0);
991 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
992 label: Some("Tensor Compute Bind Group"),
993 layout: &bind_group_layout,
994 entries: &[
995 wgpu::BindGroupEntry {
996 binding: 0,
997 resource: x_buffer.as_entire_binding(),
998 },
999 wgpu::BindGroupEntry {
1000 binding: 1,
1001 resource: y_buffer.as_entire_binding(),
1002 },
1003 wgpu::BindGroupEntry {
1004 binding: 2,
1005 resource: z_buffer.as_entire_binding(),
1006 },
1007 wgpu::BindGroupEntry {
1008 binding: 3,
1009 resource: output_buffer.as_entire_binding(),
1010 },
1011 ],
1012 });
1013
1014 let mut encoder = self
1016 .device
1017 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1018 label: Some("Tensor Compute Encoder"),
1019 });
1020
1021 {
1022 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1023 label: Some("Tensor Compute Pass"),
1024 timestamp_writes: None,
1025 });
1026 compute_pass.set_pipeline(&self.tensor_pipeline);
1027 compute_pass.set_bind_group(0, &bind_group, &[]);
1028 let workgroup_count = batch_size.div_ceil(64); compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
1030 }
1031
1032 encoder.copy_buffer_to_buffer(
1033 &output_buffer,
1034 0,
1035 &staging_buffer,
1036 0,
1037 (batch_size * 4) as u64,
1038 );
1039
1040 self.queue.submit(std::iter::once(encoder.finish()));
1041
1042 let buffer_slice = staging_buffer.slice(..);
1044 let (sender, receiver) = futures::channel::oneshot::channel();
1045 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
1046 let _ = sender.send(result);
1047 });
1048
1049 self.device.poll(wgpu::Maintain::Wait);
1050
1051 receiver
1052 .await
1053 .map_err(|_| GpuError::BufferError("Failed to receive buffer map result".to_string()))?
1054 .map_err(|e| GpuError::BufferError(format!("Buffer mapping failed: {:?}", e)))?;
1055
1056 let data = buffer_slice.get_mapped_range();
1057 let result_f32: &[f32] = bytemuck::cast_slice(&data);
1058 let results: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
1059
1060 drop(data);
1061 staging_buffer.unmap();
1062
1063 Ok(results)
1064 }
1065
1066 fn create_tensor_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
1067 let shader_source = TENSOR_COMPUTE_SHADER;
1068 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
1069 label: Some("Tensor Compute Shader"),
1070 source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(shader_source)),
1071 });
1072
1073 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1074 label: Some("Tensor Compute Pipeline"),
1075 layout: None,
1076 module: &shader,
1077 entry_point: "main",
1078 });
1079
1080 Ok(compute_pipeline)
1081 }
1082
1083 fn create_fisher_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
1084 Self::create_tensor_pipeline(device)
1086 }
1087
1088 fn create_divergence_pipeline(
1089 device: &wgpu::Device,
1090 ) -> Result<wgpu::ComputePipeline, GpuError> {
1091 Self::create_tensor_pipeline(device)
1093 }
1094}
1095
1096pub struct GpuDeviceInfo {
1098 is_gpu: bool,
1099 #[allow(dead_code)]
1100 description: String,
1101}
1102
1103impl GpuDeviceInfo {
1104 fn new(is_gpu: bool, description: &str) -> Self {
1105 Self {
1106 is_gpu,
1107 description: description.to_string(),
1108 }
1109 }
1110
1111 pub fn is_gpu(&self) -> bool {
1112 self.is_gpu
1113 }
1114
1115 pub fn supports_webgpu(&self) -> bool {
1116 self.is_gpu
1117 }
1118
1119 pub fn is_initialized(&self) -> bool {
1120 true
1121 }
1122}
1123
1124pub struct GpuFisherMatrix {
1126 matrix: Vec<Vec<f64>>,
1127}
1128
1129impl GpuFisherMatrix {
1130 fn new(matrix: Vec<Vec<f64>>) -> Self {
1131 Self { matrix }
1132 }
1133
1134 pub async fn eigenvalues(&self) -> Result<Vec<f64>, GpuError> {
1135 let mut eigenvals = Vec::new();
1137 for i in 0..self.matrix.len() {
1138 if i < self.matrix[i].len() {
1139 eigenvals.push(self.matrix[i][i]);
1140 }
1141 }
1142 Ok(eigenvals)
1143 }
1144}
1145
1146const TENSOR_COMPUTE_SHADER: &str = r#"
1148@group(0) @binding(0)
1149var<storage, read> x_batch: array<vec3<f32>>;
1150
1151@group(0) @binding(1)
1152var<storage, read> y_batch: array<vec3<f32>>;
1153
1154@group(0) @binding(2)
1155var<storage, read> z_batch: array<vec3<f32>>;
1156
1157@group(0) @binding(3)
1158var<storage, read_write> output: array<f32>;
1159
1160@compute @workgroup_size(64)
1161fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1162 let idx = global_id.x;
1163 if (idx >= arrayLength(&x_batch)) {
1164 return;
1165 }
1166
1167 let x = x_batch[idx];
1168 let y = y_batch[idx];
1169 let z = z_batch[idx];
1170
1171 // Compute scalar triple product: x · (y × z)
1172 let cross_yz = cross(y, z);
1173 let scalar_triple = dot(x, cross_yz);
1174
1175 output[idx] = scalar_triple;
1176}
1177"#;
1178
1179#[cfg(test)]
1180mod tests {
1181 use super::*;
1182
1183 #[test]
1184 fn test_should_use_gpu() {
1185 assert!(!GpuCliffordAlgebra::should_use_gpu(10));
1186 assert!(GpuCliffordAlgebra::should_use_gpu(1000));
1187 }
1188
1189 #[tokio::test]
1190 async fn test_gpu_info_geometry_creation() {
1191 if std::env::var("CI").is_ok()
1193 || std::env::var("GITHUB_ACTIONS").is_ok()
1194 || std::env::var("DISPLAY").is_err()
1195 {
1196 println!("Skipping GPU test in CI environment");
1197 return;
1198 }
1199
1200 match GpuInfoGeometry::new().await {
1202 Ok(_) => {
1203 println!("GPU initialization successful");
1205 }
1206 Err(GpuError::InitializationError(_)) => {
1207 println!("GPU initialization failed - no GPU available");
1209 }
1210 Err(e) => panic!("Unexpected error: {:?}", e),
1211 }
1212 }
1213}