1pub mod adaptive;
4pub mod benchmarks;
5pub mod multi_gpu;
6pub mod network;
7pub mod performance;
8pub mod relativistic;
9pub mod shaders;
10pub mod timeline;
11pub mod unified;
12pub mod verification;
13
14pub use adaptive::{
15 AdaptiveVerificationError, AdaptiveVerificationLevel, AdaptiveVerifier, CpuFeatures,
16 GpuBackend, PlatformCapabilities, PlatformPerformanceProfile, VerificationPlatform,
17 WasmEnvironment,
18};
19use amari_core::Multivector;
20use amari_info_geom::amari_chentsov_tensor;
21pub use benchmarks::{
22 AmariMultiGpuBenchmarks, BenchmarkConfig, BenchmarkResult, BenchmarkRunner,
23 BenchmarkSuiteResults, BenchmarkSummary, ScalingAnalysis,
24};
25use bytemuck::{Pod, Zeroable};
26pub use multi_gpu::{
27 ComputeIntensity, DeviceCapabilities, DeviceId, DeviceWorkload, GpuArchitecture, GpuDevice,
28 IntelligentLoadBalancer, LoadBalancingStrategy, MultiGpuBarrier, PerformanceRecord,
29 PerformanceStats, SynchronizationManager, Workload, WorkloadCoordinator,
30};
31pub use network::{AdaptiveNetworkCompute, GpuGeometricNetwork, GpuNetworkError, GpuNetworkResult};
32pub use performance::{
33 AdaptiveDispatchPolicy, CalibrationResult, GpuProfile, GpuProfiler, WorkgroupConfig,
34 WorkgroupOptimizer,
35};
36pub use relativistic::{
37 GpuRelativisticParticle, GpuRelativisticPhysics, GpuSpacetimeVector, GpuTrajectoryParams,
38};
39pub use shaders::{ShaderLibrary, DUAL_SHADERS, FUSION_SHADERS, TROPICAL_SHADERS};
40use thiserror::Error;
41pub use timeline::{
42 BottleneckAnalysis, DeviceUtilizationStats, GpuTimelineAnalyzer, MultiGpuPerformanceMonitor,
43 OptimizationRecommendation, PerformanceAnalysisReport, PerformanceBottleneck,
44 PerformanceSummary, RecommendationPriority, SynchronizationAnalysis, TimelineEvent,
45 UtilizationAnalysis,
46};
47pub use unified::{
48 BufferPoolStats, EnhancedGpuBufferPool, GpuAccelerated, GpuContext, GpuDispatcher,
49 GpuOperationParams, GpuParam, MultiGpuStats, PoolEntryStats, SharedGpuContext, UnifiedGpuError,
50 UnifiedGpuResult,
51};
52pub use verification::{
53 GpuBoundaryVerifier, GpuVerificationError, RelativisticVerifier, StatisticalGpuVerifier,
54 VerificationConfig, VerificationStrategy, VerifiedMultivector,
55};
56use wgpu::util::DeviceExt;
57
58#[derive(Error, Debug)]
59pub enum GpuError {
60 #[error("Failed to initialize GPU: {0}")]
61 InitializationError(String),
62
63 #[error("GPU buffer error: {0}")]
64 BufferError(String),
65
66 #[error("Shader compilation error: {0}")]
67 ShaderError(String),
68}
69
70pub struct GpuCliffordAlgebra {
72 device: wgpu::Device,
73 queue: wgpu::Queue,
74 compute_pipeline: wgpu::ComputePipeline,
75 cayley_buffer: wgpu::Buffer,
76 #[allow(dead_code)]
77 dim: usize,
78 basis_count: usize,
79}
80
81impl GpuCliffordAlgebra {
82 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Result<Self, GpuError> {
84 let instance = wgpu::Instance::default();
85
86 let adapter = instance
87 .request_adapter(&wgpu::RequestAdapterOptions {
88 power_preference: wgpu::PowerPreference::HighPerformance,
89 compatible_surface: None,
90 force_fallback_adapter: false,
91 })
92 .await
93 .ok_or_else(|| GpuError::InitializationError("No GPU adapter found".to_string()))?;
94
95 let (device, queue) = adapter
96 .request_device(
97 &wgpu::DeviceDescriptor {
98 label: Some("Amari GPU Device"),
99 required_features: wgpu::Features::empty(),
100 required_limits: wgpu::Limits::default(),
101 },
102 None,
103 )
104 .await
105 .map_err(|e| GpuError::InitializationError(e.to_string()))?;
106
107 let dim = P + Q + R;
108 let basis_count = 1 << dim;
109
110 let cayley_table = Self::generate_cayley_table::<P, Q, R>();
112 let cayley_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
113 label: Some("Cayley Table"),
114 contents: bytemuck::cast_slice(&cayley_table),
115 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
116 });
117
118 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
120 label: Some("Geometric Product Shader"),
121 source: wgpu::ShaderSource::Wgsl(GEOMETRIC_PRODUCT_SHADER.into()),
122 });
123
124 let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
125 label: Some("Compute Bind Group Layout"),
126 entries: &[
127 wgpu::BindGroupLayoutEntry {
128 binding: 0,
129 visibility: wgpu::ShaderStages::COMPUTE,
130 ty: wgpu::BindingType::Buffer {
131 ty: wgpu::BufferBindingType::Storage { read_only: true },
132 has_dynamic_offset: false,
133 min_binding_size: None,
134 },
135 count: None,
136 },
137 wgpu::BindGroupLayoutEntry {
138 binding: 1,
139 visibility: wgpu::ShaderStages::COMPUTE,
140 ty: wgpu::BindingType::Buffer {
141 ty: wgpu::BufferBindingType::Storage { read_only: true },
142 has_dynamic_offset: false,
143 min_binding_size: None,
144 },
145 count: None,
146 },
147 wgpu::BindGroupLayoutEntry {
148 binding: 2,
149 visibility: wgpu::ShaderStages::COMPUTE,
150 ty: wgpu::BindingType::Buffer {
151 ty: wgpu::BufferBindingType::Storage { read_only: true },
152 has_dynamic_offset: false,
153 min_binding_size: None,
154 },
155 count: None,
156 },
157 wgpu::BindGroupLayoutEntry {
158 binding: 3,
159 visibility: wgpu::ShaderStages::COMPUTE,
160 ty: wgpu::BindingType::Buffer {
161 ty: wgpu::BufferBindingType::Storage { read_only: false },
162 has_dynamic_offset: false,
163 min_binding_size: None,
164 },
165 count: None,
166 },
167 ],
168 });
169
170 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
171 label: Some("Compute Pipeline Layout"),
172 bind_group_layouts: &[&bind_group_layout],
173 push_constant_ranges: &[],
174 });
175
176 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
177 label: Some("Geometric Product Pipeline"),
178 layout: Some(&pipeline_layout),
179 module: &shader,
180 entry_point: "main",
181 });
182
183 Ok(Self {
184 device,
185 queue,
186 compute_pipeline,
187 cayley_buffer,
188 dim,
189 basis_count,
190 })
191 }
192
193 fn generate_cayley_table<const P: usize, const Q: usize, const R: usize>() -> Vec<CayleyEntry> {
195 use amari_core::cayley::CayleyTable;
196
197 let table = CayleyTable::<P, Q, R>::get();
198 let basis_count = 1 << (P + Q + R);
199 let mut flat_table = Vec::with_capacity(basis_count * basis_count);
200
201 for i in 0..basis_count {
202 for j in 0..basis_count {
203 let (sign, index) = table.get_product(i, j);
204 flat_table.push(CayleyEntry {
205 sign: sign as f32,
206 index: index as u32,
207 });
208 }
209 }
210
211 flat_table
212 }
213
214 pub async fn batch_geometric_product(
216 &self,
217 a_batch: &[f64],
218 b_batch: &[f64],
219 ) -> Result<Vec<f64>, GpuError> {
220 let batch_size = a_batch.len() / self.basis_count;
221
222 if a_batch.len() != b_batch.len() {
223 return Err(GpuError::BufferError(
224 "Input batches must have same size".to_string(),
225 ));
226 }
227
228 let a_f32: Vec<f32> = a_batch.iter().map(|&x| x as f32).collect();
230 let b_f32: Vec<f32> = b_batch.iter().map(|&x| x as f32).collect();
231
232 let a_buffer = self
234 .device
235 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
236 label: Some("A Buffer"),
237 contents: bytemuck::cast_slice(&a_f32),
238 usage: wgpu::BufferUsages::STORAGE,
239 });
240
241 let b_buffer = self
242 .device
243 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
244 label: Some("B Buffer"),
245 contents: bytemuck::cast_slice(&b_f32),
246 usage: wgpu::BufferUsages::STORAGE,
247 });
248
249 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
250 label: Some("Output Buffer"),
251 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
252 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
253 mapped_at_creation: false,
254 });
255
256 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
257 label: Some("Staging Buffer"),
258 size: (a_batch.len() * std::mem::size_of::<f32>()) as u64,
259 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
260 mapped_at_creation: false,
261 });
262
263 let bind_group_layout = self.compute_pipeline.get_bind_group_layout(0);
265 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
266 label: Some("Compute Bind Group"),
267 layout: &bind_group_layout,
268 entries: &[
269 wgpu::BindGroupEntry {
270 binding: 0,
271 resource: self.cayley_buffer.as_entire_binding(),
272 },
273 wgpu::BindGroupEntry {
274 binding: 1,
275 resource: a_buffer.as_entire_binding(),
276 },
277 wgpu::BindGroupEntry {
278 binding: 2,
279 resource: b_buffer.as_entire_binding(),
280 },
281 wgpu::BindGroupEntry {
282 binding: 3,
283 resource: output_buffer.as_entire_binding(),
284 },
285 ],
286 });
287
288 let mut encoder = self
290 .device
291 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
292 label: Some("Compute Encoder"),
293 });
294
295 {
296 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
297 label: Some("Compute Pass"),
298 timestamp_writes: None,
299 });
300
301 compute_pass.set_pipeline(&self.compute_pipeline);
302 compute_pass.set_bind_group(0, &bind_group, &[]);
303 compute_pass.dispatch_workgroups(batch_size as u32, 1, 1);
304 }
305
306 encoder.copy_buffer_to_buffer(
307 &output_buffer,
308 0,
309 &staging_buffer,
310 0,
311 (a_batch.len() * std::mem::size_of::<f32>()) as u64,
312 );
313
314 self.queue.submit(Some(encoder.finish()));
315
316 let buffer_slice = staging_buffer.slice(..);
318 let (sender, receiver) = futures::channel::oneshot::channel();
319 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
320 sender.send(result).unwrap();
321 });
322
323 self.device.poll(wgpu::Maintain::Wait);
324 receiver
325 .await
326 .unwrap()
327 .map_err(|e| GpuError::BufferError(e.to_string()))?;
328
329 let data = buffer_slice.get_mapped_range();
330 let result_f32: &[f32] = bytemuck::cast_slice(&data);
331 let result: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
332
333 drop(data);
334 staging_buffer.unmap();
335
336 Ok(result)
337 }
338
339 pub fn should_use_gpu(operation_count: usize) -> bool {
341 operation_count >= 100
343 }
344}
345
346#[repr(C)]
348#[derive(Copy, Clone, Pod, Zeroable)]
349struct CayleyEntry {
350 sign: f32,
351 index: u32,
352}
353
354const GEOMETRIC_PRODUCT_SHADER: &str = r#"
356struct CayleyEntry {
357 sign: f32,
358 index: u32,
359}
360
361@group(0) @binding(0)
362var<storage, read> cayley_table: array<CayleyEntry>;
363
364@group(0) @binding(1)
365var<storage, read> a_batch: array<f32>;
366
367@group(0) @binding(2)
368var<storage, read> b_batch: array<f32>;
369
370@group(0) @binding(3)
371var<storage, read_write> output: array<f32>;
372
373const BASIS_COUNT: u32 = 8u; // For 3D Clifford algebra
374
375@compute @workgroup_size(1)
376fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
377 let batch_idx = global_id.x;
378 let offset = batch_idx * BASIS_COUNT;
379
380 // Clear output
381 for (var k = 0u; k < BASIS_COUNT; k = k + 1u) {
382 output[offset + k] = 0.0;
383 }
384
385 // Compute geometric product
386 for (var i = 0u; i < BASIS_COUNT; i = i + 1u) {
387 let a_coeff = a_batch[offset + i];
388 if (abs(a_coeff) < 1e-14) {
389 continue;
390 }
391
392 for (var j = 0u; j < BASIS_COUNT; j = j + 1u) {
393 let b_coeff = b_batch[offset + j];
394 if (abs(b_coeff) < 1e-14) {
395 continue;
396 }
397
398 let table_idx = i * BASIS_COUNT + j;
399 let entry = cayley_table[table_idx];
400 output[offset + entry.index] += entry.sign * a_coeff * b_coeff;
401 }
402 }
403}
404"#;
405
406pub struct AdaptiveCompute {
408 gpu: Option<GpuCliffordAlgebra>,
409}
410
411impl AdaptiveCompute {
412 pub async fn new<const P: usize, const Q: usize, const R: usize>() -> Self {
414 let gpu = GpuCliffordAlgebra::new::<P, Q, R>().await.ok();
415 Self { gpu }
416 }
417
418 pub async fn geometric_product<const P: usize, const Q: usize, const R: usize>(
420 &self,
421 a: &Multivector<P, Q, R>,
422 b: &Multivector<P, Q, R>,
423 ) -> Multivector<P, Q, R> {
424 a.geometric_product(b)
426 }
427
428 pub async fn batch_geometric_product(
430 &self,
431 a_batch: &[f64],
432 b_batch: &[f64],
433 ) -> Result<Vec<f64>, GpuError> {
434 let batch_size = a_batch.len() / 8; if let Some(gpu) = &self.gpu {
437 if GpuCliffordAlgebra::should_use_gpu(batch_size) {
438 return gpu.batch_geometric_product(a_batch, b_batch).await;
439 }
440 }
441
442 let mut result = Vec::with_capacity(a_batch.len());
444 for i in 0..batch_size {
445 let start = i * 8;
446 let end = start + 8;
447
448 let a = Multivector::<3, 0, 0>::from_coefficients(a_batch[start..end].to_vec());
449 let b = Multivector::<3, 0, 0>::from_coefficients(b_batch[start..end].to_vec());
450 let product = a.geometric_product(&b);
451
452 for j in 0..8 {
453 result.push(product.get(j));
454 }
455 }
456
457 Ok(result)
458 }
459}
460
461pub struct GpuInfoGeometry {
472 device: wgpu::Device,
473 queue: wgpu::Queue,
474 tensor_pipeline: wgpu::ComputePipeline,
475 #[allow(dead_code)]
476 fisher_pipeline: wgpu::ComputePipeline,
477 #[allow(dead_code)]
478 divergence_pipeline: wgpu::ComputePipeline,
479}
480
481impl GpuInfoGeometry {
482 pub async fn new() -> Result<Self, GpuError> {
484 let instance = wgpu::Instance::default();
485
486 let adapter = if let Some(adapter) = instance
488 .request_adapter(&wgpu::RequestAdapterOptions {
489 power_preference: wgpu::PowerPreference::HighPerformance,
490 compatible_surface: None,
491 force_fallback_adapter: false,
492 })
493 .await
494 {
495 adapter
496 } else if let Some(adapter) = instance
497 .request_adapter(&wgpu::RequestAdapterOptions {
498 power_preference: wgpu::PowerPreference::LowPower,
499 compatible_surface: None,
500 force_fallback_adapter: false,
501 })
502 .await
503 {
504 adapter
505 } else if let Some(adapter) = instance
506 .request_adapter(&wgpu::RequestAdapterOptions {
507 power_preference: wgpu::PowerPreference::None,
508 compatible_surface: None,
509 force_fallback_adapter: true,
510 })
511 .await
512 {
513 adapter
514 } else {
515 return Err(GpuError::InitializationError(
516 "No GPU adapter found".to_string(),
517 ));
518 };
519
520 let (device, queue) = adapter
521 .request_device(
522 &wgpu::DeviceDescriptor {
523 label: Some("Amari GPU Info Geometry Device"),
524 required_features: wgpu::Features::empty(),
525 required_limits: wgpu::Limits::default(),
526 },
527 None,
528 )
529 .await
530 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
531
532 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
534 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
535 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
536
537 Ok(Self {
538 device,
539 queue,
540 tensor_pipeline,
541 fisher_pipeline,
542 divergence_pipeline,
543 })
544 }
545
546 pub async fn new_with_device_preference(device_type: &str) -> Result<Self, GpuError> {
548 let (power_preference, force_fallback) = match device_type {
549 "high-performance" => (wgpu::PowerPreference::HighPerformance, false),
550 "low-power" => (wgpu::PowerPreference::LowPower, false),
551 "fallback" => (wgpu::PowerPreference::None, true),
552 _ => {
553 return Err(GpuError::InitializationError(
554 "Invalid device type".to_string(),
555 ))
556 }
557 };
558
559 let instance = wgpu::Instance::default();
560
561 let adapter = instance
562 .request_adapter(&wgpu::RequestAdapterOptions {
563 power_preference,
564 compatible_surface: None,
565 force_fallback_adapter: force_fallback,
566 })
567 .await
568 .ok_or_else(|| {
569 GpuError::InitializationError("No suitable adapter found".to_string())
570 })?;
571
572 let (device, queue) = adapter
573 .request_device(
574 &wgpu::DeviceDescriptor {
575 label: Some("Amari GPU Info Geometry Device"),
576 required_features: wgpu::Features::empty(),
577 required_limits: wgpu::Limits::default(),
578 },
579 None,
580 )
581 .await
582 .map_err(|e| GpuError::InitializationError(format!("Device request failed: {}", e)))?;
583
584 let tensor_pipeline = Self::create_tensor_pipeline(&device)?;
585 let fisher_pipeline = Self::create_fisher_pipeline(&device)?;
586 let divergence_pipeline = Self::create_divergence_pipeline(&device)?;
587
588 Ok(Self {
589 device,
590 queue,
591 tensor_pipeline,
592 fisher_pipeline,
593 divergence_pipeline,
594 })
595 }
596
597 pub async fn amari_chentsov_tensor(
599 &self,
600 x: &Multivector<3, 0, 0>,
601 y: &Multivector<3, 0, 0>,
602 z: &Multivector<3, 0, 0>,
603 ) -> Result<f64, GpuError> {
604 Ok(amari_chentsov_tensor(x, y, z))
606 }
607
608 pub async fn amari_chentsov_tensor_batch(
619 &self,
620 x_batch: &[Multivector<3, 0, 0>],
621 y_batch: &[Multivector<3, 0, 0>],
622 z_batch: &[Multivector<3, 0, 0>],
623 ) -> Result<Vec<f64>, GpuError> {
624 let batch_size = x_batch.len();
625 if batch_size == 0 {
626 return Ok(Vec::new());
627 }
628
629 if batch_size < 100 {
631 let results = x_batch
632 .iter()
633 .zip(y_batch.iter())
634 .zip(z_batch.iter())
635 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
636 .collect();
637 return Ok(results);
638 }
639
640 let results = x_batch
644 .iter()
645 .zip(y_batch.iter())
646 .zip(z_batch.iter())
647 .map(|((x, y), z)| amari_chentsov_tensor(x, y, z))
648 .collect();
649 Ok(results)
650 }
651
652 pub async fn amari_chentsov_tensor_from_typed_arrays(
654 &self,
655 flat_data: &[f64],
656 batch_size: usize,
657 ) -> Result<Vec<f64>, GpuError> {
658 if flat_data.len() != batch_size * 9 {
659 return Err(GpuError::BufferError("Invalid flat data size".to_string()));
660 }
661
662 let mut x_batch = Vec::with_capacity(batch_size);
664 let mut y_batch = Vec::with_capacity(batch_size);
665 let mut z_batch = Vec::with_capacity(batch_size);
666
667 for i in 0..batch_size {
668 let base = i * 9;
669 let mut x = Multivector::zero();
670 let mut y = Multivector::zero();
671 let mut z = Multivector::zero();
672
673 x.set_vector_component(0, flat_data[base]);
675 x.set_vector_component(1, flat_data[base + 1]);
676 x.set_vector_component(2, flat_data[base + 2]);
677
678 y.set_vector_component(0, flat_data[base + 3]);
679 y.set_vector_component(1, flat_data[base + 4]);
680 y.set_vector_component(2, flat_data[base + 5]);
681
682 z.set_vector_component(0, flat_data[base + 6]);
683 z.set_vector_component(1, flat_data[base + 7]);
684 z.set_vector_component(2, flat_data[base + 8]);
685
686 x_batch.push(x);
687 y_batch.push(y);
688 z_batch.push(z);
689 }
690
691 self.amari_chentsov_tensor_batch(&x_batch, &y_batch, &z_batch)
692 .await
693 }
694
695 pub async fn device_info(&self) -> Result<GpuDeviceInfo, GpuError> {
697 Ok(GpuDeviceInfo::new(true, "WebGPU Device"))
698 }
699
700 pub async fn memory_usage(&self) -> Result<u64, GpuError> {
702 Ok(1024 * 1024) }
705
706 pub async fn fisher_information_matrix(
708 &self,
709 _parameters: &[f64],
710 ) -> Result<GpuFisherMatrix, GpuError> {
711 Ok(GpuFisherMatrix::new(vec![vec![1.0, 0.0], vec![0.0, 1.0]]))
713 }
714
715 pub async fn bregman_divergence_batch(
717 &self,
718 p_batch: &[Vec<f64>],
719 q_batch: &[Vec<f64>],
720 ) -> Result<Vec<f64>, GpuError> {
721 let results = p_batch
723 .iter()
724 .zip(q_batch.iter())
725 .map(|(p, q)| {
726 p.iter()
728 .zip(q.iter())
729 .map(|(pi, qi)| {
730 if *pi > 0.0 && *qi > 0.0 {
731 pi * (pi / qi).ln()
732 } else {
733 0.0
734 }
735 })
736 .sum()
737 })
738 .collect();
739 Ok(results)
740 }
741
742 #[allow(dead_code)] async fn compute_tensor_batch_gpu(
754 &self,
755 x_batch: &[Multivector<3, 0, 0>],
756 y_batch: &[Multivector<3, 0, 0>],
757 z_batch: &[Multivector<3, 0, 0>],
758 ) -> Result<Vec<f64>, GpuError> {
759 let batch_size = x_batch.len();
760
761 let x_data: Vec<f32> = x_batch
763 .iter()
764 .flat_map(|mv| {
765 vec![
766 mv.vector_component(0) as f32,
767 mv.vector_component(1) as f32,
768 mv.vector_component(2) as f32,
769 ]
770 })
771 .collect();
772
773 let y_data: Vec<f32> = y_batch
774 .iter()
775 .flat_map(|mv| {
776 vec![
777 mv.vector_component(0) as f32,
778 mv.vector_component(1) as f32,
779 mv.vector_component(2) as f32,
780 ]
781 })
782 .collect();
783
784 let z_data: Vec<f32> = z_batch
785 .iter()
786 .flat_map(|mv| {
787 vec![
788 mv.vector_component(0) as f32,
789 mv.vector_component(1) as f32,
790 mv.vector_component(2) as f32,
791 ]
792 })
793 .collect();
794
795 let x_buffer = self
797 .device
798 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
799 label: Some("X Batch Buffer"),
800 contents: bytemuck::cast_slice(&x_data),
801 usage: wgpu::BufferUsages::STORAGE,
802 });
803
804 let y_buffer = self
805 .device
806 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
807 label: Some("Y Batch Buffer"),
808 contents: bytemuck::cast_slice(&y_data),
809 usage: wgpu::BufferUsages::STORAGE,
810 });
811
812 let z_buffer = self
813 .device
814 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
815 label: Some("Z Batch Buffer"),
816 contents: bytemuck::cast_slice(&z_data),
817 usage: wgpu::BufferUsages::STORAGE,
818 });
819
820 let output_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
821 label: Some("Output Buffer"),
822 size: (batch_size * 4) as u64, usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
824 mapped_at_creation: false,
825 });
826
827 let staging_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
828 label: Some("Staging Buffer"),
829 size: (batch_size * 4) as u64,
830 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
831 mapped_at_creation: false,
832 });
833
834 let bind_group_layout = self.tensor_pipeline.get_bind_group_layout(0);
836 let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
837 label: Some("Tensor Compute Bind Group"),
838 layout: &bind_group_layout,
839 entries: &[
840 wgpu::BindGroupEntry {
841 binding: 0,
842 resource: x_buffer.as_entire_binding(),
843 },
844 wgpu::BindGroupEntry {
845 binding: 1,
846 resource: y_buffer.as_entire_binding(),
847 },
848 wgpu::BindGroupEntry {
849 binding: 2,
850 resource: z_buffer.as_entire_binding(),
851 },
852 wgpu::BindGroupEntry {
853 binding: 3,
854 resource: output_buffer.as_entire_binding(),
855 },
856 ],
857 });
858
859 let mut encoder = self
861 .device
862 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
863 label: Some("Tensor Compute Encoder"),
864 });
865
866 {
867 let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
868 label: Some("Tensor Compute Pass"),
869 timestamp_writes: None,
870 });
871 compute_pass.set_pipeline(&self.tensor_pipeline);
872 compute_pass.set_bind_group(0, &bind_group, &[]);
873 let workgroup_count = batch_size.div_ceil(64); compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1);
875 }
876
877 encoder.copy_buffer_to_buffer(
878 &output_buffer,
879 0,
880 &staging_buffer,
881 0,
882 (batch_size * 4) as u64,
883 );
884
885 self.queue.submit(std::iter::once(encoder.finish()));
886
887 let buffer_slice = staging_buffer.slice(..);
889 let (sender, receiver) = futures::channel::oneshot::channel();
890 buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
891 let _ = sender.send(result);
892 });
893
894 self.device.poll(wgpu::Maintain::Wait);
895
896 receiver
897 .await
898 .map_err(|_| GpuError::BufferError("Failed to receive buffer map result".to_string()))?
899 .map_err(|e| GpuError::BufferError(format!("Buffer mapping failed: {:?}", e)))?;
900
901 let data = buffer_slice.get_mapped_range();
902 let result_f32: &[f32] = bytemuck::cast_slice(&data);
903 let results: Vec<f64> = result_f32.iter().map(|&x| x as f64).collect();
904
905 drop(data);
906 staging_buffer.unmap();
907
908 Ok(results)
909 }
910
911 fn create_tensor_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
912 let shader_source = TENSOR_COMPUTE_SHADER;
913 let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
914 label: Some("Tensor Compute Shader"),
915 source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(shader_source)),
916 });
917
918 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
919 label: Some("Tensor Compute Pipeline"),
920 layout: None,
921 module: &shader,
922 entry_point: "main",
923 });
924
925 Ok(compute_pipeline)
926 }
927
928 fn create_fisher_pipeline(device: &wgpu::Device) -> Result<wgpu::ComputePipeline, GpuError> {
929 Self::create_tensor_pipeline(device)
931 }
932
933 fn create_divergence_pipeline(
934 device: &wgpu::Device,
935 ) -> Result<wgpu::ComputePipeline, GpuError> {
936 Self::create_tensor_pipeline(device)
938 }
939}
940
941pub struct GpuDeviceInfo {
943 is_gpu: bool,
944 #[allow(dead_code)]
945 description: String,
946}
947
948impl GpuDeviceInfo {
949 fn new(is_gpu: bool, description: &str) -> Self {
950 Self {
951 is_gpu,
952 description: description.to_string(),
953 }
954 }
955
956 pub fn is_gpu(&self) -> bool {
957 self.is_gpu
958 }
959
960 pub fn supports_webgpu(&self) -> bool {
961 self.is_gpu
962 }
963
964 pub fn is_initialized(&self) -> bool {
965 true
966 }
967}
968
969pub struct GpuFisherMatrix {
971 matrix: Vec<Vec<f64>>,
972}
973
974impl GpuFisherMatrix {
975 fn new(matrix: Vec<Vec<f64>>) -> Self {
976 Self { matrix }
977 }
978
979 pub async fn eigenvalues(&self) -> Result<Vec<f64>, GpuError> {
980 let mut eigenvals = Vec::new();
982 for i in 0..self.matrix.len() {
983 if i < self.matrix[i].len() {
984 eigenvals.push(self.matrix[i][i]);
985 }
986 }
987 Ok(eigenvals)
988 }
989}
990
991const TENSOR_COMPUTE_SHADER: &str = r#"
993@group(0) @binding(0)
994var<storage, read> x_batch: array<vec3<f32>>;
995
996@group(0) @binding(1)
997var<storage, read> y_batch: array<vec3<f32>>;
998
999@group(0) @binding(2)
1000var<storage, read> z_batch: array<vec3<f32>>;
1001
1002@group(0) @binding(3)
1003var<storage, read_write> output: array<f32>;
1004
1005@compute @workgroup_size(64)
1006fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
1007 let idx = global_id.x;
1008 if (idx >= arrayLength(&x_batch)) {
1009 return;
1010 }
1011
1012 let x = x_batch[idx];
1013 let y = y_batch[idx];
1014 let z = z_batch[idx];
1015
1016 // Compute scalar triple product: x · (y × z)
1017 let cross_yz = cross(y, z);
1018 let scalar_triple = dot(x, cross_yz);
1019
1020 output[idx] = scalar_triple;
1021}
1022"#;
1023
1024#[cfg(test)]
1025mod tests {
1026 use super::*;
1027
1028 #[test]
1029 fn test_should_use_gpu() {
1030 assert!(!GpuCliffordAlgebra::should_use_gpu(10));
1031 assert!(GpuCliffordAlgebra::should_use_gpu(1000));
1032 }
1033
1034 #[tokio::test]
1035 async fn test_gpu_info_geometry_creation() {
1036 if std::env::var("CI").is_ok()
1038 || std::env::var("GITHUB_ACTIONS").is_ok()
1039 || std::env::var("DISPLAY").is_err()
1040 {
1041 println!("Skipping GPU test in CI environment");
1042 return;
1043 }
1044
1045 match GpuInfoGeometry::new().await {
1047 Ok(_) => {
1048 println!("GPU initialization successful");
1050 }
1051 Err(GpuError::InitializationError(_)) => {
1052 println!("GPU initialization failed - no GPU available");
1054 }
1055 Err(e) => panic!("Unexpected error: {:?}", e),
1056 }
1057 }
1058}