1pub mod adaptive;
102#[cfg(feature = "automata")]
103pub mod automata;
104pub mod benchmarks;
105#[cfg(feature = "calculus")]
106pub mod calculus;
107#[cfg(feature = "dual")]
108pub mod dual;
109#[cfg(feature = "enumerative")]
110pub mod enumerative;
111#[cfg(feature = "holographic")]
116pub mod holographic;
117#[cfg(feature = "measure")]
118pub mod measure;
119pub mod multi_gpu;
120pub mod network;
121pub mod performance;
122pub mod relativistic;
123pub mod shaders;
124pub mod timeline;
125pub mod unified;
130pub mod verification;
131
132pub use adaptive::{
133 AdaptiveVerificationError, AdaptiveVerificationLevel, AdaptiveVerifier, CpuFeatures,
134 GpuBackend, PlatformCapabilities, PlatformPerformanceProfile, VerificationPlatform,
135 WasmEnvironment,
136};
137use amari_core::Multivector;
138use amari_info_geom::amari_chentsov_tensor;
139pub use benchmarks::{
140 AmariMultiGpuBenchmarks, BenchmarkConfig, BenchmarkResult, BenchmarkRunner,
141 BenchmarkSuiteResults, BenchmarkSummary, ScalingAnalysis,
142};
143use bytemuck::{Pod, Zeroable};
144#[cfg(feature = "calculus")]
145pub use calculus::GpuCalculus;
146#[cfg(feature = "holographic")]
147pub use holographic::{
148 GpuHolographic, GpuHolographicError, GpuHolographicMemory, GpuHolographicResult,
149};
150#[cfg(feature = "measure")]
151pub use measure::{
152 GpuIntegrator, GpuMonteCarloIntegrator, GpuMultidimIntegrator, GpuParametricDensity,
153 GpuTropicalMeasure,
154};
155pub use multi_gpu::{
156 ComputeIntensity, DeviceCapabilities, DeviceId, DeviceWorkload, GpuArchitecture, GpuDevice,
157 IntelligentLoadBalancer, LoadBalancingStrategy, MultiGpuBarrier, PerformanceRecord,
158 PerformanceStats, SynchronizationManager, Workload, WorkloadCoordinator,
159};
160pub use network::{AdaptiveNetworkCompute, GpuGeometricNetwork, GpuNetworkError, GpuNetworkResult};
161pub use performance::{
162 AdaptiveDispatchPolicy, CalibrationResult, GpuProfile, GpuProfiler, WorkgroupConfig,
163 WorkgroupOptimizer,
164};
165pub use relativistic::{
166 GpuRelativisticParticle, GpuRelativisticPhysics, GpuSpacetimeVector, GpuTrajectoryParams,
167};
168pub use shaders::{ShaderLibrary, DUAL_SHADERS, FUSION_SHADERS, TROPICAL_SHADERS};
169use thiserror::Error;
170pub use timeline::{
171 BottleneckAnalysis, DeviceUtilizationStats, GpuTimelineAnalyzer, MultiGpuPerformanceMonitor,
172 OptimizationRecommendation, PerformanceAnalysisReport, PerformanceBottleneck,
173 PerformanceSummary, RecommendationPriority, SynchronizationAnalysis, TimelineEvent,
174 UtilizationAnalysis,
175};
176pub use unified::{
177 BufferPoolStats, EnhancedGpuBufferPool, GpuAccelerated, GpuContext, GpuDispatcher,
178 GpuOperationParams, GpuParam, MultiGpuStats, PoolEntryStats, SharedGpuContext, UnifiedGpuError,
179 UnifiedGpuResult,
180};
181pub use verification::{
182 GpuBoundaryVerifier, GpuVerificationError, RelativisticVerifier, StatisticalGpuVerifier,
183 VerificationConfig, VerificationStrategy, VerifiedMultivector,
184};
185use wgpu::util::DeviceExt;
186
187#[derive(Error, Debug)]
188pub enum GpuError {
189 #[error("Failed to initialize GPU: {0}")]
190 InitializationError(String),
191
192 #[error("GPU buffer error: {0}")]
193 BufferError(String),
194
195 #[error("Shader compilation error: {0}")]
196 ShaderError(String),
197}
198
199pub struct GpuCliffordAlgebra {
201 device: wgpu::Device,
202 queue: wgpu::Queue,
203 compute_pipeline: wgpu::ComputePipeline,
204 cayley_buffer: wgpu::Buffer,
205 #[allow(dead_code)]
206 dim: usize,
207 basis_count: usize,
208}
209
210impl GpuCliffordAlgebra {
211 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Result<Self, GpuError> {
213 let instance = wgpu::Instance::default();
214
215 let adapter = instance
216 .request_adapter(&wgpu::RequestAdapterOptions {
217 power_preference: wgpu::PowerPreference::HighPerformance,
218 compatible_surface: None,
219 force_fallback_adapter: false,
220 })
221 .await
222 .ok_or_else(|| GpuError::InitializationError("No GPU adapter found".to_string()))?;
223
224 let (device, queue) = adapter
225 .request_device(
226 &wgpu::DeviceDescriptor {
227 label: Some("Amari GPU Device"),
228 required_features: wgpu::Features::empty(),
229 required_limits: wgpu::Limits::default(),
230 },
231 None,
232 )
233 .await
234 .map_err(|e| GpuError::InitializationError(e.to_string()))?;
235
236 let dim = P + Q + R;
237 let basis_count = 1 << dim;
238
239 let cayley_table = Self::generate_cayley_table::<P, Q, R>();
241 let cayley_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
242 label: Some("Cayley Table"),
243 contents: bytemuck::cast_slice(&cayley_table),
244 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
245 });
246
247 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
249 label: Some("Geometric Product Shader"),
250 source: wgpu::ShaderSource::Wgsl(GEOMETRIC_PRODUCT_SHADER.into()),
251 });
252
253 let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
254 label: Some("Compute Bind Group Layout"),
255 entries: &[
256 wgpu::BindGroupLayoutEntry {
257 binding: 0,
258 visibility: wgpu::ShaderStages::COMPUTE,
259 ty: wgpu::BindingType::Buffer {
260 ty: wgpu::BufferBindingType::Storage { read_only: true },
261 has_dynamic_offset: false,
262 min_binding_size: None,
263 },
264 count: None,
265 },
266 wgpu::BindGroupLayoutEntry {
267 binding: 1,
268 visibility: wgpu::ShaderStages::COMPUTE,
269 ty: wgpu::BindingType::Buffer {
270 ty: wgpu::BufferBindingType::Storage { read_only: true },
271 has_dynamic_offset: false,
272 min_binding_size: None,
273 },
274 count: None,
275 },
276 wgpu::BindGroupLayoutEntry {
277 binding: 2,
278 visibility: wgpu::ShaderStages::COMPUTE,
279 ty: wgpu::BindingType::Buffer {
280 ty: wgpu::BufferBindingType::Storage { read_only: true },
281 has_dynamic_offset: false,
282 min_binding_size: None,
283 },
284 count: None,
285 },
286 wgpu::BindGroupLayoutEntry {
287 binding: 3,
288 visibility: wgpu::ShaderStages::COMPUTE,
289 ty: wgpu::BindingType::Buffer {
290 ty: wgpu::BufferBindingType::Storage { read_only: false },
291 has_dynamic_offset: false,
292 min_binding_size: None,
293 },
294 count: None,
295 },
296 ],
297 });
298
299 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
300 label: Some("Compute Pipeline Layout"),
301 bind_group_layouts: &[&bind_group_layout],
302 push_constant_ranges: &[],
303 });
304
305 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
306 label: Some("Geometric Product Pipeline"),
307 layout: Some(&pipeline_layout),
308 module: &shader,
309 entry_point: "main",
310 });
311
312 Ok(Self {
313 device,
314 queue,
315 compute_pipeline,
316 cayley_buffer,
317 dim,
318 basis_count,
319 })
320 }
321
322 fn generate_cayley_table<const P: usize, const Q: usize, const R: usize>() -> Vec<CayleyEntry> {
324 use amari_core::cayley::CayleyTable;
325
326 let table = CayleyTable::<P, Q, R>::get();
327 let basis_count = 1 << (P + Q + R);
328 let mut flat_table = Vec::with_capacity(basis_count * basis_count);
329
330 for i in 0..basis_count {
331 for j in 0..basis_count {
332 let (sign, index) = table.get_product(i, j);
333 flat_table.push(CayleyEntry {
334 sign: sign as f32,
335 index: index as u32,
336 });
337 }
338 }
339
340 flat_table
341 }
342
343 pub async fn batch_geometric_product(
345 &self,
346 a_batch: &[f64],
347 b_batch: &[f64],
348 ) -> Result<Vec<f64>, GpuError> {
349 let batch_size = a_batch.len() / self.basis_count;
350
351 if a_batch.len() != b_batch.len() {
352 return Err(GpuError::BufferError(
353 "Input batches must have same size".to_string(),
354 ));
355 }
356
357 let a_f32: Vec<f32> = a_batch.iter().map(|&x| x as f32).collect();
359 let b_f32: Vec<f32> = b_batch.iter().map(|&x| x as f32).collect();
360
361 let a_buffer = self
363 .device
364 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
365 label: Some("A Buffer"),
366 contents: bytemuck::cast_slice(&a_f32),
367 usage: wgpu::BufferUsages::STORAGE,
368 });
369
370 let b_buffer = self
371 .device
372 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
373 label: Some("B Buffer"),
374 contents: bytemuck::cast_slice(&b_f32),
375 usage: wgpu::BufferUsages::STORAGE,
376 });
377
378 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
379 label: Some("Output Buffer"),
380 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
381 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
382 mapped_at_creation: false,
383 });
384
385 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
386 label: Some("Staging Buffer"),
387 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
388 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
389 mapped_at_creation: false,
390 });
391
392 let bind_group_layout = self.compute_pipeline.get_bind_group_layout(0);
394 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
395 label: Some("Compute Bind Group"),
396 layout: &bind_group_layout,
397 entries: &[
398 wgpu::BindGroupEntry {
399 binding: 0,
400 resource: self.cayley_buffer.as_entire_binding(),
401 },
402 wgpu::BindGroupEntry {
403 binding: 1,
404 resource: a_buffer.as_entire_binding(),
405 },
406 wgpu::BindGroupEntry {
407 binding: 2,
408 resource: b_buffer.as_entire_binding(),
409 },
410 wgpu::BindGroupEntry {
411 binding: 3,
412 resource: output_buffer.as_entire_binding(),
413 },
414 ],
415 });
416
417 let mut encoder = self
419 .device
420 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
421 label: Some("Compute Encoder"),
422 });
423
424 {
425 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
426 label: Some("Compute Pass"),
427 timestamp_writes: None,
428 });
429
430 compute_pass.set_pipeline(&self.compute_pipeline);
431 compute_pass.set_bind_group(0, &bind_group, &[]);
432 compute_pass.dispatch_workgroups(batch_size as u32, 1, 1);
433 }
434
435 encoder.copy_buffer_to_buffer(
436 &output_buffer,
437 0,
438 &staging_buffer,
439 0,
440 (a_batch.len() * std::mem::size_of::<f32>()) as u64,
441 );
442
443 self.queue.submit(Some(encoder.finish()));
444
445 let buffer_slice = staging_buffer.slice(..);
447 let (sender, receiver) = futures::channel::oneshot::channel();
448 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
449 sender.send(result).unwrap();
450 });
451
452 self.device.poll(wgpu::Maintain::Wait);
453 receiver
454 .await
455 .unwrap()
456 .map_err(|e| GpuError::BufferError(e.to_string()))?;
457
458 let data = buffer_slice.get_mapped_range();
459 let result_f32: &[f32] = bytemuck::cast_slice(&data);
460 let result: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
461
462 drop(data);
463 staging_buffer.unmap();
464
465 Ok(result)
466 }
467
468 pub fn should_use_gpu(operation_count: usize) -> bool {
470 operation_count >= 100
472 }
473}
474
475#[repr(C)]
477#[derive(Copy, Clone, Pod, Zeroable)]
478struct CayleyEntry {
479 sign: f32,
480 index: u32,
481}
482
483const GEOMETRIC_PRODUCT_SHADER: &str = r#"
485struct CayleyEntry {
486 sign: f32,
487 index: u32,
488}
489
490@group(0) @binding(0)
491var<storage, read> cayley_table: array<CayleyEntry>;
492
493@group(0) @binding(1)
494var<storage, read> a_batch: array<f32>;
495
496@group(0) @binding(2)
497var<storage, read> b_batch: array<f32>;
498
499@group(0) @binding(3)
500var<storage, read_write> output: array<f32>;
501
502const BASIS_COUNT: u32 = 8u; // For 3D Clifford algebra
503
504@compute @workgroup_size(1)
505fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
506 let batch_idx = global_id.x;
507 let offset = batch_idx * BASIS_COUNT;
508
509 // Clear output
510 for (var k = 0u; k < BASIS_COUNT; k = k + 1u) {
511 output[offset + k] = 0.0;
512 }
513
514 // Compute geometric product
515 for (var i = 0u; i < BASIS_COUNT; i = i + 1u) {
516 let a_coeff = a_batch[offset + i];
517 if (abs(a_coeff) < 1e-14) {
518 continue;
519 }
520
521 for (var j = 0u; j < BASIS_COUNT; j = j + 1u) {
522 let b_coeff = b_batch[offset + j];
523 if (abs(b_coeff) < 1e-14) {
524 continue;
525 }
526
527 let table_idx = i * BASIS_COUNT + j;
528 let entry = cayley_table[table_idx];
529 output[offset + entry.index] += entry.sign * a_coeff * b_coeff;
530 }
531 }
532}
533"#;
534
535pub struct AdaptiveCompute {
537 gpu: Option<GpuCliffordAlgebra>,
538}
539
540impl AdaptiveCompute {
541 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Self {
543 let gpu = GpuCliffordAlgebra::new::<P, Q, R>().await.ok();
544 Self { gpu }
545 }
546
547 pub async fn geometric_product<const P: usize, const Q: usize, const R: usize>(
549 &self,
550 a: &Multivector<P, Q, R>,
551 b: &Multivector<P, Q, R>,
552 ) -> Multivector<P, Q, R> {
553 a.geometric_product(b)
555 }
556
557 pub async fn batch_geometric_product(
559 &self,
560 a_batch: &[f64],
561 b_batch: &[f64],
562 ) -> Result<Vec<f64>, GpuError> {
563 let batch_size = a_batch.len() / 8; if let Some(gpu) = &self.gpu {
566 if GpuCliffordAlgebra::should_use_gpu(batch_size) {
567 return gpu.batch_geometric_product(a_batch, b_batch).await;
568 }
569 }
570
571 let mut result = Vec::with_capacity(a_batch.len());
573 for i in 0..batch_size {
574 let start = i * 8;
575 let end = start + 8;
576
577 let a = Multivector::<3, 0, 0>::from_coefficients(a_batch[start..end].to_vec());
578 let b = Multivector::<3, 0, 0>::from_coefficients(b_batch[start..end].to_vec());
579 let product = a.geometric_product(&b);
580
581 for j in 0..8 {
582 result.push(product.get(j));
583 }
584 }
585
586 Ok(result)
587 }
588}
589
590pub struct GpuInfoGeometry {
601 device: wgpu::Device,
602 queue: wgpu::Queue,
603 tensor_pipeline: wgpu::ComputePipeline,
604 #[allow(dead_code)]
605 fisher_pipeline: wgpu::ComputePipeline,
606 #[allow(dead_code)]
607 divergence_pipeline: wgpu::ComputePipeline,
608}
609
610impl GpuInfoGeometry {
611 pub async fn new() -> Result<Self, GpuError> {
613 let instance = wgpu::Instance::default();
614
615 let adapter = if let Some(adapter) = instance
617 .request_adapter(&wgpu::RequestAdapterOptions {
618 power_preference: wgpu::PowerPreference::HighPerformance,
619 compatible_surface: None,
620 force_fallback_adapter: false,
621 })
622 .await
623 {
624 adapter
625 } else if let Some(adapter) = instance
626 .request_adapter(&wgpu::RequestAdapterOptions {
627 power_preference: wgpu::PowerPreference::LowPower,
628 compatible_surface: None,
629 force_fallback_adapter: false,
630 })
631 .await
632 {
633 adapter
634 } else if let Some(adapter) = instance
635 .request_adapter(&wgpu::RequestAdapterOptions {
636 power_preference: wgpu::PowerPreference::None,
637 compatible_surface: None,
638 force_fallback_adapter: true,
639 })
640 .await
641 {
642 adapter
643 } else {
644 return Err(GpuError::InitializationError(
645 "No GPU adapter found".to_string(),
646 ));
647 };
648
649 let (device, queue) = adapter
650 .request_device(
651 &wgpu::DeviceDescriptor {
652 label: Some("Amari GPU Info Geometry Device"),
653 required_features: wgpu::Features::empty(),
654 required_limits: wgpu::Limits::default(),
655 },
656 None,
657 )
658 .await
659 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
660
661 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
663 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
664 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
665
666 Ok(Self {
667 device,
668 queue,
669 tensor_pipeline,
670 fisher_pipeline,
671 divergence_pipeline,
672 })
673 }
674
675 pub async fn new_with_device_preference(device_type: &str) -> Result<Self, GpuError> {
677 let (power_preference, force_fallback) = match device_type {
678 "high-performance" => (wgpu::PowerPreference::HighPerformance, false),
679 "low-power" => (wgpu::PowerPreference::LowPower, false),
680 "fallback" => (wgpu::PowerPreference::None, true),
681 _ => {
682 return Err(GpuError::InitializationError(
683 "Invalid device type".to_string(),
684 ))
685 }
686 };
687
688 let instance = wgpu::Instance::default();
689
690 let adapter = instance
691 .request_adapter(&wgpu::RequestAdapterOptions {
692 power_preference,
693 compatible_surface: None,
694 force_fallback_adapter: force_fallback,
695 })
696 .await
697 .ok_or_else(|| {
698 GpuError::InitializationError("No suitable adapter found".to_string())
699 })?;
700
701 let (device, queue) = adapter
702 .request_device(
703 &wgpu::DeviceDescriptor {
704 label: Some("Amari GPU Info Geometry Device"),
705 required_features: wgpu::Features::empty(),
706 required_limits: wgpu::Limits::default(),
707 },
708 None,
709 )
710 .await
711 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
712
713 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
714 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
715 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
716
717 Ok(Self {
718 device,
719 queue,
720 tensor_pipeline,
721 fisher_pipeline,
722 divergence_pipeline,
723 })
724 }
725
726 pub async fn amari_chentsov_tensor(
728 &self,
729 x: &Multivector<3, 0, 0>,
730 y: &Multivector<3, 0, 0>,
731 z: &Multivector<3, 0, 0>,
732 ) -> Result<f64, GpuError> {
733 Ok(amari_chentsov_tensor(x, y, z))
735 }
736
737 pub async fn amari_chentsov_tensor_batch(
748 &self,
749 x_batch: &[Multivector<3, 0, 0>],
750 y_batch: &[Multivector<3, 0, 0>],
751 z_batch: &[Multivector<3, 0, 0>],
752 ) -> Result<Vec<f64>, GpuError> {
753 let batch_size = x_batch.len();
754 if batch_size == 0 {
755 return Ok(Vec::new());
756 }
757
758 if batch_size < 100 {
760 let results = x_batch
761 .iter()
762 .zip(y_batch.iter())
763 .zip(z_batch.iter())
764 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
765 .collect();
766 return Ok(results);
767 }
768
769 let results = x_batch
773 .iter()
774 .zip(y_batch.iter())
775 .zip(z_batch.iter())
776 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
777 .collect();
778 Ok(results)
779 }
780
781 pub async fn amari_chentsov_tensor_from_typed_arrays(
783 &self,
784 flat_data: &[f64],
785 batch_size: usize,
786 ) -> Result<Vec<f64>, GpuError> {
787 if flat_data.len() != batch_size * 9 {
788 return Err(GpuError::BufferError("Invalid flat data size".to_string()));
789 }
790
791 let mut x_batch = Vec::with_capacity(batch_size);
793 let mut y_batch = Vec::with_capacity(batch_size);
794 let mut z_batch = Vec::with_capacity(batch_size);
795
796 for i in 0..batch_size {
797 let base = i * 9;
798 let mut x = Multivector::zero();
799 let mut y = Multivector::zero();
800 let mut z = Multivector::zero();
801
802 x.set_vector_component(0, flat_data[base]);
804 x.set_vector_component(1, flat_data[base + 1]);
805 x.set_vector_component(2, flat_data[base + 2]);
806
807 y.set_vector_component(0, flat_data[base + 3]);
808 y.set_vector_component(1, flat_data[base + 4]);
809 y.set_vector_component(2, flat_data[base + 5]);
810
811 z.set_vector_component(0, flat_data[base + 6]);
812 z.set_vector_component(1, flat_data[base + 7]);
813 z.set_vector_component(2, flat_data[base + 8]);
814
815 x_batch.push(x);
816 y_batch.push(y);
817 z_batch.push(z);
818 }
819
820 self.amari_chentsov_tensor_batch(&x_batch, &y_batch, &z_batch)
821 .await
822 }
823
824 pub async fn device_info(&self) -> Result<GpuDeviceInfo, GpuError> {
826 Ok(GpuDeviceInfo::new(true, "WebGPU Device"))
827 }
828
829 pub async fn memory_usage(&self) -> Result<u64, GpuError> {
831 Ok(1024 * 1024) }
834
835 pub async fn fisher_information_matrix(
837 &self,
838 _parameters: &[f64],
839 ) -> Result<GpuFisherMatrix, GpuError> {
840 Ok(GpuFisherMatrix::new(vec![vec![1.0, 0.0], vec![0.0, 1.0]]))
842 }
843
844 pub async fn bregman_divergence_batch(
846 &self,
847 p_batch: &[Vec<f64>],
848 q_batch: &[Vec<f64>],
849 ) -> Result<Vec<f64>, GpuError> {
850 let results = p_batch
852 .iter()
853 .zip(q_batch.iter())
854 .map(|(p, q)| {
855 p.iter()
857 .zip(q.iter())
858 .map(|(pi, qi)| {
859 if *pi > 0.0 && *qi > 0.0 {
860 pi * (pi / qi).ln()
861 } else {
862 0.0
863 }
864 })
865 .sum()
866 })
867 .collect();
868 Ok(results)
869 }
870
871 #[allow(dead_code)] async fn compute_tensor_batch_gpu(
883 &self,
884 x_batch: &[Multivector<3, 0, 0>],
885 y_batch: &[Multivector<3, 0, 0>],
886 z_batch: &[Multivector<3, 0, 0>],
887 ) -> Result<Vec<f64>, GpuError> {
888 let batch_size = x_batch.len();
889
890 let x_data: Vec<f32> = x_batch
892 .iter()
893 .flat_map(|mv| {
894 vec![
895 mv.vector_component(0) as f32,
896 mv.vector_component(1) as f32,
897 mv.vector_component(2) as f32,
898 ]
899 })
900 .collect();
901
902 let y_data: Vec<f32> = y_batch
903 .iter()
904 .flat_map(|mv| {
905 vec![
906 mv.vector_component(0) as f32,
907 mv.vector_component(1) as f32,
908 mv.vector_component(2) as f32,
909 ]
910 })
911 .collect();
912
913 let z_data: Vec<f32> = z_batch
914 .iter()
915 .flat_map(|mv| {
916 vec![
917 mv.vector_component(0) as f32,
918 mv.vector_component(1) as f32,
919 mv.vector_component(2) as f32,
920 ]
921 })
922 .collect();
923
924 let x_buffer = self
926 .device
927 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
928 label: Some("X Batch Buffer"),
929 contents: bytemuck::cast_slice(&x_data),
930 usage: wgpu::BufferUsages::STORAGE,
931 });
932
933 let y_buffer = self
934 .device
935 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
936 label: Some("Y Batch Buffer"),
937 contents: bytemuck::cast_slice(&y_data),
938 usage: wgpu::BufferUsages::STORAGE,
939 });
940
941 let z_buffer = self
942 .device
943 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
944 label: Some("Z Batch Buffer"),
945 contents: bytemuck::cast_slice(&z_data),
946 usage: wgpu::BufferUsages::STORAGE,
947 });
948
949 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
950 label: Some("Output Buffer"),
951 size: (batch_size * 4) as u64, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
953 mapped_at_creation: false,
954 });
955
956 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
957 label: Some("Staging Buffer"),
958 size: (batch_size * 4) as u64,
959 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
960 mapped_at_creation: false,
961 });
962
963 let bind_group_layout = self.tensor_pipeline.get_bind_group_layout(0);
965 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
966 label: Some("Tensor Compute Bind Group"),
967 layout: &bind_group_layout,
968 entries: &[
969 wgpu::BindGroupEntry {
970 binding: 0,
971 resource: x_buffer.as_entire_binding(),
972 },
973 wgpu::BindGroupEntry {
974 binding: 1,
975 resource: y_buffer.as_entire_binding(),
976 },
977 wgpu::BindGroupEntry {
978 binding: 2,
979 resource: z_buffer.as_entire_binding(),
980 },
981 wgpu::BindGroupEntry {
982 binding: 3,
983 resource: output_buffer.as_entire_binding(),
984 },
985 ],
986 });
987
988 let mut encoder = self
990 .device
991 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
992 label: Some("Tensor Compute Encoder"),
993 });
994
995 {
996 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
997 label: Some("Tensor Compute Pass"),
998 timestamp_writes: None,
999 });
1000 compute_pass.set_pipeline(&self.tensor_pipeline);
1001 compute_pass.set_bind_group(0, &bind_group, &[]);
1002 let workgroup_count = batch_size.div_ceil(64); compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
1004 }
1005
1006 encoder.copy_buffer_to_buffer(
1007 &output_buffer,
1008 0,
1009 &staging_buffer,
1010 0,
1011 (batch_size * 4) as u64,
1012 );
1013
1014 self.queue.submit(std::iter::once(encoder.finish()));
1015
1016 let buffer_slice = staging_buffer.slice(..);
1018 let (sender, receiver) = futures::channel::oneshot::channel();
1019 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
1020 let _ = sender.send(result);
1021 });
1022
1023 self.device.poll(wgpu::Maintain::Wait);
1024
1025 receiver
1026 .await
1027 .map_err(|_| GpuError::BufferError("Failed to receive buffer map result".to_string()))?
1028 .map_err(|e| GpuError::BufferError(format!("Buffer mapping failed: {:?}", e)))?;
1029
1030 let data = buffer_slice.get_mapped_range();
1031 let result_f32: &[f32] = bytemuck::cast_slice(&data);
1032 let results: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
1033
1034 drop(data);
1035 staging_buffer.unmap();
1036
1037 Ok(results)
1038 }
1039
1040 fn create_tensor_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
1041 let shader_source = TENSOR_COMPUTE_SHADER;
1042 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
1043 label: Some("Tensor Compute Shader"),
1044 source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(shader_source)),
1045 });
1046
1047 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1048 label: Some("Tensor Compute Pipeline"),
1049 layout: None,
1050 module: &shader,
1051 entry_point: "main",
1052 });
1053
1054 Ok(compute_pipeline)
1055 }
1056
1057 fn create_fisher_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
1058 Self::create_tensor_pipeline(device)
1060 }
1061
1062 fn create_divergence_pipeline(
1063 device: &wgpu::Device,
1064 ) -> Result<wgpu::ComputePipeline, GpuError> {
1065 Self::create_tensor_pipeline(device)
1067 }
1068}
1069
1070pub struct GpuDeviceInfo {
1072 is_gpu: bool,
1073 #[allow(dead_code)]
1074 description: String,
1075}
1076
1077impl GpuDeviceInfo {
1078 fn new(is_gpu: bool, description: &str) -> Self {
1079 Self {
1080 is_gpu,
1081 description: description.to_string(),
1082 }
1083 }
1084
1085 pub fn is_gpu(&self) -> bool {
1086 self.is_gpu
1087 }
1088
1089 pub fn supports_webgpu(&self) -> bool {
1090 self.is_gpu
1091 }
1092
1093 pub fn is_initialized(&self) -> bool {
1094 true
1095 }
1096}
1097
1098pub struct GpuFisherMatrix {
1100 matrix: Vec<Vec<f64>>,
1101}
1102
1103impl GpuFisherMatrix {
1104 fn new(matrix: Vec<Vec<f64>>) -> Self {
1105 Self { matrix }
1106 }
1107
1108 pub async fn eigenvalues(&self) -> Result<Vec<f64>, GpuError> {
1109 let mut eigenvals = Vec::new();
1111 for i in 0..self.matrix.len() {
1112 if i < self.matrix[i].len() {
1113 eigenvals.push(self.matrix[i][i]);
1114 }
1115 }
1116 Ok(eigenvals)
1117 }
1118}
1119
1120const TENSOR_COMPUTE_SHADER: &str = r#"
1122@group(0) @binding(0)
1123var<storage, read> x_batch: array<vec3<f32>>;
1124
1125@group(0) @binding(1)
1126var<storage, read> y_batch: array<vec3<f32>>;
1127
1128@group(0) @binding(2)
1129var<storage, read> z_batch: array<vec3<f32>>;
1130
1131@group(0) @binding(3)
1132var<storage, read_write> output: array<f32>;
1133
1134@compute @workgroup_size(64)
1135fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1136 let idx = global_id.x;
1137 if (idx >= arrayLength(&x_batch)) {
1138 return;
1139 }
1140
1141 let x = x_batch[idx];
1142 let y = y_batch[idx];
1143 let z = z_batch[idx];
1144
1145 // Compute scalar triple product: x · (y × z)
1146 let cross_yz = cross(y, z);
1147 let scalar_triple = dot(x, cross_yz);
1148
1149 output[idx] = scalar_triple;
1150}
1151"#;
1152
1153#[cfg(test)]
1154mod tests {
1155 use super::*;
1156
1157 #[test]
1158 fn test_should_use_gpu() {
1159 assert!(!GpuCliffordAlgebra::should_use_gpu(10));
1160 assert!(GpuCliffordAlgebra::should_use_gpu(1000));
1161 }
1162
1163 #[tokio::test]
1164 async fn test_gpu_info_geometry_creation() {
1165 if std::env::var("CI").is_ok()
1167 || std::env::var("GITHUB_ACTIONS").is_ok()
1168 || std::env::var("DISPLAY").is_err()
1169 {
1170 println!("Skipping GPU test in CI environment");
1171 return;
1172 }
1173
1174 match GpuInfoGeometry::new().await {
1176 Ok(_) => {
1177 println!("GPU initialization successful");
1179 }
1180 Err(GpuError::InitializationError(_)) => {
1181 println!("GPU initialization failed - no GPU available");
1183 }
1184 Err(e) => panic!("Unexpected error: {:?}", e),
1185 }
1186 }
1187}