use crate::{Error, Position3D, Result};
use candle_core::{DType, Device, Tensor};
use scirs2_core::ndarray::{Array1, Array2, Array3};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GpuConfig {
pub prefer_gpu: bool,
pub device_id: usize,
pub memory_limit: usize,
pub batch_size: usize,
pub mixed_precision: bool,
}
impl Default for GpuConfig {
fn default() -> Self {
Self {
prefer_gpu: true,
device_id: 0,
memory_limit: 0, batch_size: 32,
mixed_precision: true,
}
}
}
pub struct GpuDevice {
device: Device,
config: GpuConfig,
is_gpu: bool,
}
impl GpuDevice {
pub fn new(config: GpuConfig) -> Result<Self> {
let device = if config.prefer_gpu {
match std::panic::catch_unwind(|| Device::cuda_if_available(config.device_id)) {
Ok(Ok(device)) => device,
_ => {
tracing::warn!("GPU not available, falling back to CPU");
Device::Cpu
}
}
} else {
Device::Cpu
};
let is_gpu = matches!(device, Device::Cuda(_));
if is_gpu {
tracing::info!(
"Using GPU device {} for spatial audio processing",
config.device_id
);
} else {
tracing::info!("Using CPU for spatial audio processing");
}
Ok(Self {
device,
config,
is_gpu,
})
}
pub fn device(&self) -> &Device {
&self.device
}
pub fn is_gpu(&self) -> bool {
self.is_gpu
}
pub fn config(&self) -> &GpuConfig {
&self.config
}
}
pub struct GpuConvolution {
device: Arc<GpuDevice>,
fft_size: usize,
hop_size: usize,
input_buffer: Option<Tensor>,
output_buffer: Option<Tensor>,
frequency_domain_buffer: Option<Tensor>,
}
impl GpuConvolution {
pub fn new(device: Arc<GpuDevice>, fft_size: usize, hop_size: usize) -> Result<Self> {
Ok(Self {
device,
fft_size,
hop_size,
input_buffer: None,
output_buffer: None,
frequency_domain_buffer: None,
})
}
pub fn convolve(
&mut self,
input: &Array1<f32>,
impulse_response: &Array1<f32>,
) -> Result<Array1<f32>> {
let device = self.device.device();
let input_slice = input.as_slice().ok_or_else(|| {
Error::LegacyProcessing(
"Input array is not contiguous in memory, cannot create tensor efficiently"
.to_string(),
)
})?;
let input_tensor = Tensor::from_slice(input_slice, input.len(), device)
.map_err(|e| Error::LegacyProcessing(format!("Failed to create input tensor: {e}")))?;
let ir_slice = impulse_response.as_slice().ok_or_else(|| {
Error::LegacyProcessing(
"Impulse response array is not contiguous in memory, cannot create tensor efficiently"
.to_string(),
)
})?;
let ir_tensor = Tensor::from_slice(ir_slice, impulse_response.len(), device)
.map_err(|e| Error::LegacyProcessing(format!("Failed to create IR tensor: {e}")))?;
let result = self.fft_convolve(&input_tensor, &ir_tensor)?;
let result_vec: Vec<f32> = result.to_vec1().map_err(|e| {
Error::LegacyProcessing(format!("Failed to convert result tensor: {e}"))
})?;
Ok(Array1::from_vec(result_vec))
}
#[allow(clippy::single_range_in_vec_init)]
fn fft_convolve(&self, input: &Tensor, impulse_response: &Tensor) -> Result<Tensor> {
let device = self.device.device();
let input_len = input
.dims1()
.map_err(|e| Error::LegacyProcessing(format!("Invalid input dimensions: {e}")))?;
let ir_len = impulse_response
.dims1()
.map_err(|e| Error::LegacyProcessing(format!("Invalid IR dimensions: {e}")))?;
let output_len = input_len + ir_len - 1;
let zeros = Tensor::zeros((output_len,), DType::F32, device)
.map_err(|e| Error::LegacyProcessing(format!("Failed to create output tensor: {e}")))?;
let mut result = zeros;
for i in 0..input_len {
for j in 0..ir_len {
let idx = i + j;
if idx < output_len {
let input_val = input
.get(i)
.map_err(|e| Error::LegacyProcessing(format!("Input access error: {e}")))?;
let ir_val = impulse_response
.get(j)
.map_err(|e| Error::LegacyProcessing(format!("IR access error: {e}")))?;
let current = result.get(idx).map_err(|e| {
Error::LegacyProcessing(format!("Result access error: {e}"))
})?;
let new_val = (current + (input_val * ir_val))?;
result = result.slice_assign(&[idx..idx + 1], &new_val)?;
}
}
}
Ok(result)
}
pub fn convolve_batch(
&mut self,
inputs: &Array2<f32>,
impulse_responses: &Array2<f32>,
) -> Result<Array2<f32>> {
let batch_size = inputs.shape()[0];
let input_len = inputs.shape()[1];
let ir_len = impulse_responses.shape()[1];
let output_len = input_len + ir_len - 1;
let mut results = Array2::zeros((batch_size, output_len));
for i in 0..batch_size {
let input = inputs.row(i).to_owned();
let ir = impulse_responses.row(i).to_owned();
let result = self.convolve(&input, &ir)?;
results.row_mut(i).assign(&result);
}
Ok(results)
}
}
pub struct GpuSpatialMath {
device: Arc<GpuDevice>,
}
impl GpuSpatialMath {
pub fn new(device: Arc<GpuDevice>) -> Self {
Self { device }
}
pub fn calculate_distances(
&self,
listener_pos: &Position3D,
source_positions: &[Position3D],
) -> Result<Array1<f32>> {
let device = self.device.device();
let num_sources = source_positions.len();
let listener_tensor = Tensor::from_slice(
&[listener_pos.x, listener_pos.y, listener_pos.z],
(3,),
device,
)
.map_err(|e| Error::LegacyProcessing(format!("Failed to create listener tensor: {e}")))?;
let source_data: Vec<f32> = source_positions
.iter()
.flat_map(|pos| vec![pos.x, pos.y, pos.z])
.collect();
let source_tensor = Tensor::from_slice(&source_data, (num_sources, 3), device)
.map_err(|e| Error::LegacyProcessing(format!("Failed to create source tensor: {e}")))?;
let listener_expanded = listener_tensor.unsqueeze(0)?.expand((num_sources, 3))?;
let differences = (&source_tensor - &listener_expanded)?;
let squared_diffs = differences.sqr()?;
let distances_squared = squared_diffs.sum(1)?;
let distances = distances_squared.sqrt()?;
let result_vec: Vec<f32> = distances
.to_vec1()
.map_err(|e| Error::LegacyProcessing(format!("Failed to convert distances: {e}")))?;
Ok(Array1::from_vec(result_vec))
}
pub fn batch_dot_product(
&self,
vectors_a: &Array2<f32>,
vectors_b: &Array2<f32>,
) -> Result<Array1<f32>> {
let device = self.device.device();
if vectors_a.shape() != vectors_b.shape() {
return Err(Error::LegacyProcessing(
"Vector arrays must have same shape".to_string(),
));
}
let batch_size = vectors_a.shape()[0];
let vector_len = vectors_a.shape()[1];
let slice_a = vectors_a.as_slice().ok_or_else(|| {
Error::LegacyProcessing(
"Vector array A is not contiguous in memory, cannot create tensor efficiently"
.to_string(),
)
})?;
let tensor_a = Tensor::from_slice(slice_a, (batch_size, vector_len), device)
.map_err(|e| Error::LegacyProcessing(format!("Failed to create tensor A: {e}")))?;
let slice_b = vectors_b.as_slice().ok_or_else(|| {
Error::LegacyProcessing(
"Vector array B is not contiguous in memory, cannot create tensor efficiently"
.to_string(),
)
})?;
let tensor_b = Tensor::from_slice(slice_b, (batch_size, vector_len), device)
.map_err(|e| Error::LegacyProcessing(format!("Failed to create tensor B: {e}")))?;
let products = (&tensor_a * &tensor_b)?;
let dot_products = products.sum(1)?;
let result_vec: Vec<f32> = dot_products
.to_vec1()
.map_err(|e| Error::LegacyProcessing(format!("Failed to convert dot products: {e}")))?;
Ok(Array1::from_vec(result_vec))
}
pub fn normalize_batch(&self, vectors: &Array2<f32>) -> Result<Array2<f32>> {
let device = self.device.device();
let batch_size = vectors.shape()[0];
let vector_len = vectors.shape()[1];
let slice = vectors.as_slice().ok_or_else(|| {
Error::LegacyProcessing(
"Vector array is not contiguous in memory, cannot create tensor efficiently"
.to_string(),
)
})?;
let tensor = Tensor::from_slice(slice, (batch_size, vector_len), device)
.map_err(|e| Error::LegacyProcessing(format!("Failed to create tensor: {e}")))?;
let squared = tensor.sqr()?;
let magnitudes_squared = squared.sum_keepdim(1)?;
let magnitudes = magnitudes_squared.sqrt()?;
let epsilon = Tensor::from_slice(&[1e-8f32], (1,), device)?.expand((batch_size, 1))?;
let safe_magnitudes = magnitudes.maximum(&epsilon)?;
let normalized = tensor.broadcast_div(&safe_magnitudes)?;
let result_vec: Vec<f32> = normalized
.to_vec2()
.map_err(|e| {
Error::LegacyProcessing(format!("Failed to convert normalized vectors: {e}"))
})?
.into_iter()
.flatten()
.collect();
let result = Array2::from_shape_vec((batch_size, vector_len), result_vec)
.map_err(|e| Error::LegacyProcessing(format!("Failed to reshape result: {e}")))?;
Ok(result)
}
}
pub struct GpuAmbisonics {
device: Arc<GpuDevice>,
order: u32,
encoding_matrices: Option<Tensor>,
decoding_matrices: Option<Tensor>,
}
impl GpuAmbisonics {
pub fn new(device: Arc<GpuDevice>, order: u32) -> Result<Self> {
Ok(Self {
device,
order,
encoding_matrices: None,
decoding_matrices: None,
})
}
pub fn precompute_encoding_matrices(&mut self, source_positions: &[Position3D]) -> Result<()> {
let device = self.device.device();
let num_sources = source_positions.len();
let num_channels = ((self.order + 1) * (self.order + 1)) as usize;
let mut encoding_data = Vec::with_capacity(num_sources * num_channels);
for position in source_positions {
let distance =
(position.x * position.x + position.y * position.y + position.z * position.z)
.sqrt();
let azimuth = position.y.atan2(position.x);
let elevation = (position.z / distance.max(1e-8)).asin();
for l in 0..=self.order {
for m in -(l as i32)..=(l as i32) {
let coeff = self.spherical_harmonic(l, m, azimuth, elevation);
encoding_data.push(coeff);
}
}
}
self.encoding_matrices = Some(
Tensor::from_slice(&encoding_data, (num_sources, num_channels), device).map_err(
|e| Error::LegacyProcessing(format!("Failed to create encoding matrices: {e}")),
)?,
);
Ok(())
}
fn spherical_harmonic(&self, l: u32, m: i32, azimuth: f32, elevation: f32) -> f32 {
let cos_el = elevation.cos();
let sin_el = elevation.sin();
match (l, m) {
(0, 0) => 1.0,
(1, -1) => sin_el * azimuth.sin(),
(1, 0) => cos_el,
(1, 1) => sin_el * azimuth.cos(),
_ => 0.5, }
}
pub fn encode_batch(&self, audio_samples: &Array2<f32>) -> Result<Array2<f32>> {
let encoding_matrices = self
.encoding_matrices
.as_ref()
.ok_or_else(|| Error::LegacyProcessing("Encoding matrices not computed".to_string()))?;
let device = self.device.device();
let num_sources = audio_samples.shape()[0];
let num_samples = audio_samples.shape()[1];
let num_channels = ((self.order + 1) * (self.order + 1)) as usize;
let audio_slice = audio_samples.as_slice().ok_or_else(|| {
Error::LegacyProcessing(
"Audio samples array is not contiguous in memory, cannot create tensor efficiently"
.to_string(),
)
})?;
let audio_tensor = Tensor::from_slice(audio_slice, (num_sources, num_samples), device)
.map_err(|e| Error::LegacyProcessing(format!("Failed to create audio tensor: {e}")))?;
let encoding_transposed = encoding_matrices.transpose(0, 1)?;
let encoded = encoding_transposed.matmul(&audio_tensor)?;
let result_vec: Vec<f32> = encoded
.to_vec2()
.map_err(|e| Error::LegacyProcessing(format!("Failed to convert encoded audio: {e}")))?
.into_iter()
.flatten()
.collect();
let result =
Array2::from_shape_vec((num_channels, num_samples), result_vec).map_err(|e| {
Error::LegacyProcessing(format!("Failed to reshape encoded audio: {e}"))
})?;
Ok(result)
}
}
pub struct GpuResourceManager {
devices: Vec<Arc<GpuDevice>>,
current_device: usize,
memory_usage: Vec<usize>,
}
impl GpuResourceManager {
pub fn new(configs: Vec<GpuConfig>) -> Result<Self> {
let mut devices = Vec::new();
let mut memory_usage = Vec::new();
for config in configs {
let device = Arc::new(GpuDevice::new(config)?);
devices.push(device);
memory_usage.push(0);
}
if devices.is_empty() {
devices.push(Arc::new(GpuDevice::new(GpuConfig {
prefer_gpu: false,
..Default::default()
})?));
memory_usage.push(0);
}
Ok(Self {
devices,
current_device: 0,
memory_usage,
})
}
pub fn get_optimal_device(&mut self) -> Arc<GpuDevice> {
let device = self.devices[self.current_device].clone();
self.current_device = (self.current_device + 1) % self.devices.len();
device
}
pub fn get_all_devices(&self) -> &[Arc<GpuDevice>] {
&self.devices
}
pub fn device_count(&self) -> usize {
self.devices.len()
}
pub fn get_memory_usage(&self, device_id: usize) -> Option<usize> {
self.memory_usage.get(device_id).copied()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_gpu_config() {
let config = GpuConfig::default();
assert!(config.prefer_gpu);
assert_eq!(config.batch_size, 32);
assert!(config.mixed_precision);
}
#[test]
fn test_gpu_device_creation() {
let config = GpuConfig {
prefer_gpu: false, ..Default::default()
};
let device = GpuDevice::new(config).expect("Should successfully create GPU device");
assert!(!device.is_gpu());
}
#[test]
fn test_gpu_spatial_math() {
let config = GpuConfig {
prefer_gpu: false,
..Default::default()
};
let device =
Arc::new(GpuDevice::new(config).expect("Should successfully create GPU device"));
let math = GpuSpatialMath::new(device);
let listener = Position3D::new(0.0, 0.0, 0.0);
let sources = vec![
Position3D::new(1.0, 0.0, 0.0),
Position3D::new(0.0, 1.0, 0.0),
Position3D::new(0.0, 0.0, 1.0),
];
let distances = math
.calculate_distances(&listener, &sources)
.expect("Should successfully calculate distances");
assert_eq!(distances.len(), 3);
for distance in distances.iter() {
assert!((distance - 1.0).abs() < 1e-6);
}
}
#[test]
fn test_batch_dot_product() {
let config = GpuConfig {
prefer_gpu: false,
..Default::default()
};
let device =
Arc::new(GpuDevice::new(config).expect("Should successfully create GPU device"));
let math = GpuSpatialMath::new(device);
let vectors_a = Array2::from_shape_vec(
(2, 3),
vec![
1.0, 0.0, 0.0, 0.0, 1.0, 0.0, ],
)
.expect("Should successfully create Array2 from shape vec");
let vectors_b = Array2::from_shape_vec(
(2, 3),
vec![
1.0, 0.0, 0.0, 0.0, 1.0, 0.0, ],
)
.expect("Should successfully create Array2 from shape vec");
let dot_products = math
.batch_dot_product(&vectors_a, &vectors_b)
.expect("Should successfully calculate batch dot product");
assert_eq!(dot_products.len(), 2);
for &dot_product in dot_products.iter() {
assert!((dot_product - 1.0).abs() < 1e-6);
}
}
#[test]
fn test_normalize_batch() {
let config = GpuConfig {
prefer_gpu: false,
..Default::default()
};
let device =
Arc::new(GpuDevice::new(config).expect("Should successfully create GPU device"));
let math = GpuSpatialMath::new(device);
let vectors = Array2::from_shape_vec(
(2, 3),
vec![
2.0, 0.0, 0.0, 0.0, 3.0, 0.0, ],
)
.expect("Should successfully create Array2 from shape vec");
let normalized = math
.normalize_batch(&vectors)
.expect("Should successfully normalize batch");
assert_eq!(normalized.shape(), [2, 3]);
let first_magnitude =
(normalized[[0, 0]].powi(2) + normalized[[0, 1]].powi(2) + normalized[[0, 2]].powi(2))
.sqrt();
let second_magnitude =
(normalized[[1, 0]].powi(2) + normalized[[1, 1]].powi(2) + normalized[[1, 2]].powi(2))
.sqrt();
assert!((first_magnitude - 1.0).abs() < 1e-6);
assert!((second_magnitude - 1.0).abs() < 1e-6);
}
#[test]
fn test_gpu_convolution_creation() {
let config = GpuConfig {
prefer_gpu: false,
..Default::default()
};
let device =
Arc::new(GpuDevice::new(config).expect("Should successfully create GPU device"));
let convolution = GpuConvolution::new(device, 1024, 256)
.expect("Should successfully create GPU convolution");
assert_eq!(convolution.fft_size, 1024);
assert_eq!(convolution.hop_size, 256);
}
#[test]
fn test_gpu_resource_manager() {
let configs = vec![GpuConfig {
prefer_gpu: false,
..Default::default()
}];
let mut manager = GpuResourceManager::new(configs)
.expect("Should successfully create GPU resource manager");
assert_eq!(manager.device_count(), 1);
let device = manager.get_optimal_device();
assert!(!device.is_gpu());
}
#[test]
fn test_gpu_ambisonics_creation() {
let config = GpuConfig {
prefer_gpu: false,
..Default::default()
};
let device =
Arc::new(GpuDevice::new(config).expect("Should successfully create GPU device"));
let ambisonics =
GpuAmbisonics::new(device, 1).expect("Should successfully create GPU ambisonics");
assert_eq!(ambisonics.order, 1);
}
#[test]
fn test_spherical_harmonic_calculation() {
let config = GpuConfig {
prefer_gpu: false,
..Default::default()
};
let device =
Arc::new(GpuDevice::new(config).expect("Should successfully create GPU device"));
let ambisonics =
GpuAmbisonics::new(device, 1).expect("Should successfully create GPU ambisonics");
let coeff = ambisonics.spherical_harmonic(0, 0, 0.0, 0.0);
assert_eq!(coeff, 1.0);
let coeff = ambisonics.spherical_harmonic(1, 0, 0.0, 0.0);
assert_eq!(coeff, 1.0); }
}