1use crate::{Error, Position3D, Result};
7use candle_core::{DType, Device, Tensor};
8use scirs2_core::ndarray::{Array1, Array2, Array3};
9use serde::{Deserialize, Serialize};
10use std::sync::Arc;
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct GpuConfig {
15 pub prefer_gpu: bool,
17 pub device_id: usize,
19 pub memory_limit: usize,
21 pub batch_size: usize,
23 pub mixed_precision: bool,
25}
26
27impl Default for GpuConfig {
28 fn default() -> Self {
29 Self {
30 prefer_gpu: true,
31 device_id: 0,
32 memory_limit: 0, batch_size: 32,
34 mixed_precision: true,
35 }
36 }
37}
38
39pub struct GpuDevice {
41 device: Device,
42 config: GpuConfig,
43 is_gpu: bool,
44}
45
46impl GpuDevice {
47 pub fn new(config: GpuConfig) -> Result<Self> {
49 let device = if config.prefer_gpu {
50 match std::panic::catch_unwind(|| Device::cuda_if_available(config.device_id)) {
51 Ok(Ok(device)) => device,
52 _ => {
53 tracing::warn!("GPU not available, falling back to CPU");
54 Device::Cpu
55 }
56 }
57 } else {
58 Device::Cpu
59 };
60
61 let is_gpu = matches!(device, Device::Cuda(_));
62
63 if is_gpu {
64 tracing::info!(
65 "Using GPU device {} for spatial audio processing",
66 config.device_id
67 );
68 } else {
69 tracing::info!("Using CPU for spatial audio processing");
70 }
71
72 Ok(Self {
73 device,
74 config,
75 is_gpu,
76 })
77 }
78
79 pub fn device(&self) -> &Device {
81 &self.device
82 }
83
84 pub fn is_gpu(&self) -> bool {
86 self.is_gpu
87 }
88
89 pub fn config(&self) -> &GpuConfig {
91 &self.config
92 }
93}
94
95pub struct GpuConvolution {
97 device: Arc<GpuDevice>,
98 fft_size: usize,
99 hop_size: usize,
100 input_buffer: Option<Tensor>,
102 output_buffer: Option<Tensor>,
103 frequency_domain_buffer: Option<Tensor>,
104}
105
106impl GpuConvolution {
107 pub fn new(device: Arc<GpuDevice>, fft_size: usize, hop_size: usize) -> Result<Self> {
109 Ok(Self {
110 device,
111 fft_size,
112 hop_size,
113 input_buffer: None,
114 output_buffer: None,
115 frequency_domain_buffer: None,
116 })
117 }
118
119 pub fn convolve(
121 &mut self,
122 input: &Array1<f32>,
123 impulse_response: &Array1<f32>,
124 ) -> Result<Array1<f32>> {
125 let device = self.device.device();
126
127 let input_slice = input.as_slice().ok_or_else(|| {
129 Error::LegacyProcessing(
130 "Input array is not contiguous in memory, cannot create tensor efficiently"
131 .to_string(),
132 )
133 })?;
134 let input_tensor = Tensor::from_slice(input_slice, input.len(), device)
135 .map_err(|e| Error::LegacyProcessing(format!("Failed to create input tensor: {e}")))?;
136
137 let ir_slice = impulse_response.as_slice().ok_or_else(|| {
138 Error::LegacyProcessing(
139 "Impulse response array is not contiguous in memory, cannot create tensor efficiently"
140 .to_string(),
141 )
142 })?;
143 let ir_tensor = Tensor::from_slice(ir_slice, impulse_response.len(), device)
144 .map_err(|e| Error::LegacyProcessing(format!("Failed to create IR tensor: {e}")))?;
145
146 let result = self.fft_convolve(&input_tensor, &ir_tensor)?;
148
149 let result_vec: Vec<f32> = result.to_vec1().map_err(|e| {
151 Error::LegacyProcessing(format!("Failed to convert result tensor: {e}"))
152 })?;
153
154 Ok(Array1::from_vec(result_vec))
155 }
156
157 #[allow(clippy::single_range_in_vec_init)]
159 fn fft_convolve(&self, input: &Tensor, impulse_response: &Tensor) -> Result<Tensor> {
160 let device = self.device.device();
163
164 let input_len = input
165 .dims1()
166 .map_err(|e| Error::LegacyProcessing(format!("Invalid input dimensions: {e}")))?;
167 let ir_len = impulse_response
168 .dims1()
169 .map_err(|e| Error::LegacyProcessing(format!("Invalid IR dimensions: {e}")))?;
170
171 let output_len = input_len + ir_len - 1;
172
173 let zeros = Tensor::zeros((output_len,), DType::F32, device)
175 .map_err(|e| Error::LegacyProcessing(format!("Failed to create output tensor: {e}")))?;
176
177 let mut result = zeros;
179 for i in 0..input_len {
180 for j in 0..ir_len {
181 let idx = i + j;
182 if idx < output_len {
183 let input_val = input
184 .get(i)
185 .map_err(|e| Error::LegacyProcessing(format!("Input access error: {e}")))?;
186 let ir_val = impulse_response
187 .get(j)
188 .map_err(|e| Error::LegacyProcessing(format!("IR access error: {e}")))?;
189 let current = result.get(idx).map_err(|e| {
190 Error::LegacyProcessing(format!("Result access error: {e}"))
191 })?;
192 let new_val = (current + (input_val * ir_val))?;
193 result = result.slice_assign(&[idx..idx + 1], &new_val)?;
194 }
195 }
196 }
197
198 Ok(result)
199 }
200
201 pub fn convolve_batch(
203 &mut self,
204 inputs: &Array2<f32>,
205 impulse_responses: &Array2<f32>,
206 ) -> Result<Array2<f32>> {
207 let batch_size = inputs.shape()[0];
208 let input_len = inputs.shape()[1];
209 let ir_len = impulse_responses.shape()[1];
210 let output_len = input_len + ir_len - 1;
211
212 let mut results = Array2::zeros((batch_size, output_len));
213
214 for i in 0..batch_size {
216 let input = inputs.row(i).to_owned();
217 let ir = impulse_responses.row(i).to_owned();
218 let result = self.convolve(&input, &ir)?;
219 results.row_mut(i).assign(&result);
220 }
221
222 Ok(results)
223 }
224}
225
226pub struct GpuSpatialMath {
228 device: Arc<GpuDevice>,
229}
230
231impl GpuSpatialMath {
232 pub fn new(device: Arc<GpuDevice>) -> Self {
234 Self { device }
235 }
236
237 pub fn calculate_distances(
239 &self,
240 listener_pos: &Position3D,
241 source_positions: &[Position3D],
242 ) -> Result<Array1<f32>> {
243 let device = self.device.device();
244 let num_sources = source_positions.len();
245
246 let listener_tensor = Tensor::from_slice(
248 &[listener_pos.x, listener_pos.y, listener_pos.z],
249 (3,),
250 device,
251 )
252 .map_err(|e| Error::LegacyProcessing(format!("Failed to create listener tensor: {e}")))?;
253
254 let source_data: Vec<f32> = source_positions
255 .iter()
256 .flat_map(|pos| vec![pos.x, pos.y, pos.z])
257 .collect();
258
259 let source_tensor = Tensor::from_slice(&source_data, (num_sources, 3), device)
260 .map_err(|e| Error::LegacyProcessing(format!("Failed to create source tensor: {e}")))?;
261
262 let listener_expanded = listener_tensor.unsqueeze(0)?.expand((num_sources, 3))?;
264
265 let differences = (&source_tensor - &listener_expanded)?;
266
267 let squared_diffs = differences.sqr()?;
269 let distances_squared = squared_diffs.sum(1)?;
270
271 let distances = distances_squared.sqrt()?;
273
274 let result_vec: Vec<f32> = distances
276 .to_vec1()
277 .map_err(|e| Error::LegacyProcessing(format!("Failed to convert distances: {e}")))?;
278
279 Ok(Array1::from_vec(result_vec))
280 }
281
282 pub fn batch_dot_product(
284 &self,
285 vectors_a: &Array2<f32>,
286 vectors_b: &Array2<f32>,
287 ) -> Result<Array1<f32>> {
288 let device = self.device.device();
289
290 if vectors_a.shape() != vectors_b.shape() {
291 return Err(Error::LegacyProcessing(
292 "Vector arrays must have same shape".to_string(),
293 ));
294 }
295
296 let batch_size = vectors_a.shape()[0];
297 let vector_len = vectors_a.shape()[1];
298
299 let slice_a = vectors_a.as_slice().ok_or_else(|| {
301 Error::LegacyProcessing(
302 "Vector array A is not contiguous in memory, cannot create tensor efficiently"
303 .to_string(),
304 )
305 })?;
306 let tensor_a = Tensor::from_slice(slice_a, (batch_size, vector_len), device)
307 .map_err(|e| Error::LegacyProcessing(format!("Failed to create tensor A: {e}")))?;
308
309 let slice_b = vectors_b.as_slice().ok_or_else(|| {
310 Error::LegacyProcessing(
311 "Vector array B is not contiguous in memory, cannot create tensor efficiently"
312 .to_string(),
313 )
314 })?;
315 let tensor_b = Tensor::from_slice(slice_b, (batch_size, vector_len), device)
316 .map_err(|e| Error::LegacyProcessing(format!("Failed to create tensor B: {e}")))?;
317
318 let products = (&tensor_a * &tensor_b)?;
320 let dot_products = products.sum(1)?;
321
322 let result_vec: Vec<f32> = dot_products
324 .to_vec1()
325 .map_err(|e| Error::LegacyProcessing(format!("Failed to convert dot products: {e}")))?;
326
327 Ok(Array1::from_vec(result_vec))
328 }
329
330 pub fn normalize_batch(&self, vectors: &Array2<f32>) -> Result<Array2<f32>> {
332 let device = self.device.device();
333 let batch_size = vectors.shape()[0];
334 let vector_len = vectors.shape()[1];
335
336 let slice = vectors.as_slice().ok_or_else(|| {
338 Error::LegacyProcessing(
339 "Vector array is not contiguous in memory, cannot create tensor efficiently"
340 .to_string(),
341 )
342 })?;
343 let tensor = Tensor::from_slice(slice, (batch_size, vector_len), device)
344 .map_err(|e| Error::LegacyProcessing(format!("Failed to create tensor: {e}")))?;
345
346 let squared = tensor.sqr()?;
348 let magnitudes_squared = squared.sum_keepdim(1)?;
349 let magnitudes = magnitudes_squared.sqrt()?;
350
351 let epsilon = Tensor::from_slice(&[1e-8f32], (1,), device)?.expand((batch_size, 1))?;
353 let safe_magnitudes = magnitudes.maximum(&epsilon)?;
354
355 let normalized = tensor.broadcast_div(&safe_magnitudes)?;
357
358 let result_vec: Vec<f32> = normalized
360 .to_vec2()
361 .map_err(|e| {
362 Error::LegacyProcessing(format!("Failed to convert normalized vectors: {e}"))
363 })?
364 .into_iter()
365 .flatten()
366 .collect();
367
368 let result = Array2::from_shape_vec((batch_size, vector_len), result_vec)
369 .map_err(|e| Error::LegacyProcessing(format!("Failed to reshape result: {e}")))?;
370
371 Ok(result)
372 }
373}
374
375pub struct GpuAmbisonics {
377 device: Arc<GpuDevice>,
378 order: u32,
379 encoding_matrices: Option<Tensor>,
380 decoding_matrices: Option<Tensor>,
381}
382
383impl GpuAmbisonics {
384 pub fn new(device: Arc<GpuDevice>, order: u32) -> Result<Self> {
386 Ok(Self {
387 device,
388 order,
389 encoding_matrices: None,
390 decoding_matrices: None,
391 })
392 }
393
394 pub fn precompute_encoding_matrices(&mut self, source_positions: &[Position3D]) -> Result<()> {
396 let device = self.device.device();
397 let num_sources = source_positions.len();
398 let num_channels = ((self.order + 1) * (self.order + 1)) as usize;
399
400 let mut encoding_data = Vec::with_capacity(num_sources * num_channels);
402
403 for position in source_positions {
404 let distance =
406 (position.x * position.x + position.y * position.y + position.z * position.z)
407 .sqrt();
408 let azimuth = position.y.atan2(position.x);
409 let elevation = (position.z / distance.max(1e-8)).asin();
410
411 for l in 0..=self.order {
413 for m in -(l as i32)..=(l as i32) {
414 let coeff = self.spherical_harmonic(l, m, azimuth, elevation);
415 encoding_data.push(coeff);
416 }
417 }
418 }
419
420 self.encoding_matrices = Some(
421 Tensor::from_slice(&encoding_data, (num_sources, num_channels), device).map_err(
422 |e| Error::LegacyProcessing(format!("Failed to create encoding matrices: {e}")),
423 )?,
424 );
425
426 Ok(())
427 }
428
429 fn spherical_harmonic(&self, l: u32, m: i32, azimuth: f32, elevation: f32) -> f32 {
431 let cos_el = elevation.cos();
433 let sin_el = elevation.sin();
434
435 match (l, m) {
436 (0, 0) => 1.0,
437 (1, -1) => sin_el * azimuth.sin(),
438 (1, 0) => cos_el,
439 (1, 1) => sin_el * azimuth.cos(),
440 _ => 0.5, }
442 }
443
444 pub fn encode_batch(&self, audio_samples: &Array2<f32>) -> Result<Array2<f32>> {
446 let encoding_matrices = self
447 .encoding_matrices
448 .as_ref()
449 .ok_or_else(|| Error::LegacyProcessing("Encoding matrices not computed".to_string()))?;
450
451 let device = self.device.device();
452 let num_sources = audio_samples.shape()[0];
453 let num_samples = audio_samples.shape()[1];
454 let num_channels = ((self.order + 1) * (self.order + 1)) as usize;
455
456 let audio_slice = audio_samples.as_slice().ok_or_else(|| {
458 Error::LegacyProcessing(
459 "Audio samples array is not contiguous in memory, cannot create tensor efficiently"
460 .to_string(),
461 )
462 })?;
463 let audio_tensor = Tensor::from_slice(audio_slice, (num_sources, num_samples), device)
464 .map_err(|e| Error::LegacyProcessing(format!("Failed to create audio tensor: {e}")))?;
465
466 let encoding_transposed = encoding_matrices.transpose(0, 1)?;
468 let encoded = encoding_transposed.matmul(&audio_tensor)?;
469
470 let result_vec: Vec<f32> = encoded
472 .to_vec2()
473 .map_err(|e| Error::LegacyProcessing(format!("Failed to convert encoded audio: {e}")))?
474 .into_iter()
475 .flatten()
476 .collect();
477
478 let result =
479 Array2::from_shape_vec((num_channels, num_samples), result_vec).map_err(|e| {
480 Error::LegacyProcessing(format!("Failed to reshape encoded audio: {e}"))
481 })?;
482
483 Ok(result)
484 }
485}
486
487pub struct GpuResourceManager {
489 devices: Vec<Arc<GpuDevice>>,
490 current_device: usize,
491 memory_usage: Vec<usize>,
492}
493
494impl GpuResourceManager {
495 pub fn new(configs: Vec<GpuConfig>) -> Result<Self> {
497 let mut devices = Vec::new();
498 let mut memory_usage = Vec::new();
499
500 for config in configs {
501 let device = Arc::new(GpuDevice::new(config)?);
502 devices.push(device);
503 memory_usage.push(0);
504 }
505
506 if devices.is_empty() {
507 devices.push(Arc::new(GpuDevice::new(GpuConfig {
509 prefer_gpu: false,
510 ..Default::default()
511 })?));
512 memory_usage.push(0);
513 }
514
515 Ok(Self {
516 devices,
517 current_device: 0,
518 memory_usage,
519 })
520 }
521
522 pub fn get_optimal_device(&mut self) -> Arc<GpuDevice> {
524 let device = self.devices[self.current_device].clone();
527 self.current_device = (self.current_device + 1) % self.devices.len();
528 device
529 }
530
531 pub fn get_all_devices(&self) -> &[Arc<GpuDevice>] {
533 &self.devices
534 }
535
536 pub fn device_count(&self) -> usize {
538 self.devices.len()
539 }
540
541 pub fn get_memory_usage(&self, device_id: usize) -> Option<usize> {
543 self.memory_usage.get(device_id).copied()
544 }
545}
546
547#[cfg(test)]
548mod tests {
549 use super::*;
550
551 #[test]
552 fn test_gpu_config() {
553 let config = GpuConfig::default();
554 assert!(config.prefer_gpu);
555 assert_eq!(config.batch_size, 32);
556 assert!(config.mixed_precision);
557 }
558
559 #[test]
560 fn test_gpu_device_creation() {
561 let config = GpuConfig {
562 prefer_gpu: false, ..Default::default()
564 };
565 let device = GpuDevice::new(config).expect("Should successfully create GPU device");
566 assert!(!device.is_gpu());
567 }
568
569 #[test]
570 fn test_gpu_spatial_math() {
571 let config = GpuConfig {
572 prefer_gpu: false,
573 ..Default::default()
574 };
575 let device =
576 Arc::new(GpuDevice::new(config).expect("Should successfully create GPU device"));
577 let math = GpuSpatialMath::new(device);
578
579 let listener = Position3D::new(0.0, 0.0, 0.0);
580 let sources = vec![
581 Position3D::new(1.0, 0.0, 0.0),
582 Position3D::new(0.0, 1.0, 0.0),
583 Position3D::new(0.0, 0.0, 1.0),
584 ];
585
586 let distances = math
587 .calculate_distances(&listener, &sources)
588 .expect("Should successfully calculate distances");
589 assert_eq!(distances.len(), 3);
590
591 for distance in distances.iter() {
593 assert!((distance - 1.0).abs() < 1e-6);
594 }
595 }
596
597 #[test]
598 fn test_batch_dot_product() {
599 let config = GpuConfig {
600 prefer_gpu: false,
601 ..Default::default()
602 };
603 let device =
604 Arc::new(GpuDevice::new(config).expect("Should successfully create GPU device"));
605 let math = GpuSpatialMath::new(device);
606
607 let vectors_a = Array2::from_shape_vec(
608 (2, 3),
609 vec![
610 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, ],
613 )
614 .expect("Should successfully create Array2 from shape vec");
615
616 let vectors_b = Array2::from_shape_vec(
617 (2, 3),
618 vec![
619 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, ],
622 )
623 .expect("Should successfully create Array2 from shape vec");
624
625 let dot_products = math
626 .batch_dot_product(&vectors_a, &vectors_b)
627 .expect("Should successfully calculate batch dot product");
628 assert_eq!(dot_products.len(), 2);
629
630 for &dot_product in dot_products.iter() {
632 assert!((dot_product - 1.0).abs() < 1e-6);
633 }
634 }
635
636 #[test]
637 fn test_normalize_batch() {
638 let config = GpuConfig {
639 prefer_gpu: false,
640 ..Default::default()
641 };
642 let device =
643 Arc::new(GpuDevice::new(config).expect("Should successfully create GPU device"));
644 let math = GpuSpatialMath::new(device);
645
646 let vectors = Array2::from_shape_vec(
647 (2, 3),
648 vec![
649 2.0, 0.0, 0.0, 0.0, 3.0, 0.0, ],
652 )
653 .expect("Should successfully create Array2 from shape vec");
654
655 let normalized = math
656 .normalize_batch(&vectors)
657 .expect("Should successfully normalize batch");
658 assert_eq!(normalized.shape(), [2, 3]);
659
660 let first_magnitude =
662 (normalized[[0, 0]].powi(2) + normalized[[0, 1]].powi(2) + normalized[[0, 2]].powi(2))
663 .sqrt();
664 let second_magnitude =
665 (normalized[[1, 0]].powi(2) + normalized[[1, 1]].powi(2) + normalized[[1, 2]].powi(2))
666 .sqrt();
667
668 assert!((first_magnitude - 1.0).abs() < 1e-6);
669 assert!((second_magnitude - 1.0).abs() < 1e-6);
670 }
671
672 #[test]
673 fn test_gpu_convolution_creation() {
674 let config = GpuConfig {
675 prefer_gpu: false,
676 ..Default::default()
677 };
678 let device =
679 Arc::new(GpuDevice::new(config).expect("Should successfully create GPU device"));
680 let convolution = GpuConvolution::new(device, 1024, 256)
681 .expect("Should successfully create GPU convolution");
682 assert_eq!(convolution.fft_size, 1024);
683 assert_eq!(convolution.hop_size, 256);
684 }
685
686 #[test]
687 fn test_gpu_resource_manager() {
688 let configs = vec![GpuConfig {
689 prefer_gpu: false,
690 ..Default::default()
691 }];
692
693 let mut manager = GpuResourceManager::new(configs)
694 .expect("Should successfully create GPU resource manager");
695 assert_eq!(manager.device_count(), 1);
696
697 let device = manager.get_optimal_device();
698 assert!(!device.is_gpu());
699 }
700
701 #[test]
702 fn test_gpu_ambisonics_creation() {
703 let config = GpuConfig {
704 prefer_gpu: false,
705 ..Default::default()
706 };
707 let device =
708 Arc::new(GpuDevice::new(config).expect("Should successfully create GPU device"));
709 let ambisonics =
710 GpuAmbisonics::new(device, 1).expect("Should successfully create GPU ambisonics");
711 assert_eq!(ambisonics.order, 1);
712 }
713
714 #[test]
715 fn test_spherical_harmonic_calculation() {
716 let config = GpuConfig {
717 prefer_gpu: false,
718 ..Default::default()
719 };
720 let device =
721 Arc::new(GpuDevice::new(config).expect("Should successfully create GPU device"));
722 let ambisonics =
723 GpuAmbisonics::new(device, 1).expect("Should successfully create GPU ambisonics");
724
725 let coeff = ambisonics.spherical_harmonic(0, 0, 0.0, 0.0);
727 assert_eq!(coeff, 1.0);
728
729 let coeff = ambisonics.spherical_harmonic(1, 0, 0.0, 0.0);
730 assert_eq!(coeff, 1.0); }
732}