use crate::types::Position3D;
use crate::{Error, Result};
use scirs2_core::ndarray::{Array1, Array2, Array3, Axis};
use scirs2_core::Complex32;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::f32::consts::PI;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum CompressionCodec {
PerceptualSpatial,
AmbisonicsOptimized,
PositionalCompression,
Hybrid,
Lossless,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum CompressionQuality {
Low,
Medium,
High,
VeryHigh,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpatialCompressionConfig {
pub codec: CompressionCodec,
pub quality: CompressionQuality,
pub target_bitrate: u32,
pub sample_rate: f32,
pub channel_count: usize,
pub perceptual_params: PerceptualParams,
pub spatial_params: SpatialParams,
pub adaptive_params: AdaptiveParams,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerceptualParams {
pub masking_enabled: bool,
pub frequency_bands: usize,
pub spatial_masking_threshold: f32,
pub temporal_masking: TemporalMasking,
pub loudness_compensation: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TemporalMasking {
pub enabled: bool,
pub pre_masking_ms: f32,
pub post_masking_ms: f32,
pub threshold_db: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpatialParams {
pub spatial_resolution: f32,
pub distance_quantization: usize,
pub ambisonics_order: usize,
pub source_clustering: SourceClustering,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SourceClustering {
pub enabled: bool,
pub max_cluster_distance: f32,
pub max_sources_per_cluster: usize,
pub update_interval_ms: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AdaptiveParams {
pub adaptive_bitrate: bool,
pub min_bitrate: u32,
pub max_bitrate: u32,
pub adaptation_window: f32,
pub quality_threshold: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompressedFrame {
pub audio_data: Vec<u8>,
pub spatial_metadata: SpatialMetadata,
pub compression_stats: CompressionStats,
pub timestamp_ms: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpatialMetadata {
pub source_positions: Vec<Position3D>,
pub ambisonics_coefficients: Vec<f32>,
pub spatial_covariance: Vec<f32>,
pub distance_factors: Vec<f32>,
pub listener_orientation: (f32, f32, f32), }
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompressionStats {
pub original_size: usize,
pub compressed_size: usize,
pub compression_ratio: f32,
pub achieved_bitrate: f32,
pub quality_loss: f32,
pub processing_time_ms: f32,
}
pub struct SpatialCompressor {
config: SpatialCompressionConfig,
perceptual_model: PerceptualModel,
spatial_encoder: SpatialEncoder,
adaptive_controller: AdaptiveController,
input_buffer: Array2<f32>,
output_buffer: Vec<u8>,
frame_count: u64,
}
#[derive(Debug)]
struct PerceptualModel {
frequency_bands: Array1<f32>,
masking_thresholds: Array1<f32>,
bark_scale: Array1<f32>,
temporal_state: TemporalMaskingState,
}
#[derive(Debug)]
struct TemporalMaskingState {
prev_energy: Array1<f32>,
pre_masking_buffer: Array2<f32>,
post_masking_buffer: Array2<f32>,
}
#[derive(Debug)]
struct SpatialEncoder {
method: CompressionCodec,
quantization_tables: HashMap<String, Array1<f32>>,
huffman_tables: HashMap<String, Vec<(u8, Vec<bool>)>>,
source_clusters: Vec<SourceCluster>,
}
#[derive(Debug, Clone)]
struct SourceCluster {
center: Position3D,
source_indices: Vec<usize>,
representative_signal: Array1<f32>,
mixing_weights: Array1<f32>,
}
#[derive(Debug)]
struct AdaptiveController {
current_bitrate: u32,
quality_history: Vec<f32>,
bitrate_history: Vec<u32>,
window_samples: usize,
}
impl Default for SpatialCompressionConfig {
fn default() -> Self {
Self {
codec: CompressionCodec::PerceptualSpatial,
quality: CompressionQuality::Medium,
target_bitrate: 128000, sample_rate: 48000.0,
channel_count: 8,
perceptual_params: PerceptualParams {
masking_enabled: true,
frequency_bands: 32,
spatial_masking_threshold: -40.0,
temporal_masking: TemporalMasking {
enabled: true,
pre_masking_ms: 2.0,
post_masking_ms: 100.0,
threshold_db: -20.0,
},
loudness_compensation: true,
},
spatial_params: SpatialParams {
spatial_resolution: 5.0, distance_quantization: 32,
ambisonics_order: 3,
source_clustering: SourceClustering {
enabled: true,
max_cluster_distance: 1.0,
max_sources_per_cluster: 4,
update_interval_ms: 100.0,
},
},
adaptive_params: AdaptiveParams {
adaptive_bitrate: true,
min_bitrate: 64000,
max_bitrate: 320000,
adaptation_window: 5.0,
quality_threshold: 0.85,
},
}
}
}
impl SpatialCompressor {
pub fn new(config: SpatialCompressionConfig) -> Result<Self> {
let perceptual_model = PerceptualModel::new(&config.perceptual_params, config.sample_rate)?;
let spatial_encoder = SpatialEncoder::new(&config)?;
let adaptive_controller = AdaptiveController::new(&config.adaptive_params)?;
let buffer_size = 1024; let input_buffer = Array2::zeros((config.channel_count, buffer_size));
let output_buffer = Vec::with_capacity(buffer_size * config.channel_count);
Ok(Self {
config,
perceptual_model,
spatial_encoder,
adaptive_controller,
input_buffer,
output_buffer,
frame_count: 0,
})
}
pub fn compress_frame(
&mut self,
audio_data: &Array2<f32>,
spatial_metadata: &SpatialMetadata,
) -> Result<CompressedFrame> {
let start_time = std::time::Instant::now();
if audio_data.nrows() != self.config.channel_count {
return Err(Error::LegacyProcessing(format!(
"Expected {} channels, got {}",
self.config.channel_count,
audio_data.nrows()
)));
}
if self.config.adaptive_params.adaptive_bitrate {
self.adaptive_controller.update(&self.config)?;
}
let masked_audio = self.apply_perceptual_masking(audio_data)?;
let compressed_audio = match self.config.codec {
CompressionCodec::PerceptualSpatial => {
self.compress_perceptual_spatial(&masked_audio, spatial_metadata)?
}
CompressionCodec::AmbisonicsOptimized => {
self.compress_ambisonics_optimized(&masked_audio, spatial_metadata)?
}
CompressionCodec::PositionalCompression => {
self.compress_positional(&masked_audio, spatial_metadata)?
}
CompressionCodec::Hybrid => self.compress_hybrid(&masked_audio, spatial_metadata)?,
CompressionCodec::Lossless => {
self.compress_lossless(&masked_audio, spatial_metadata)?
}
};
let processing_time = start_time.elapsed().as_secs_f32() * 1000.0;
let original_size = audio_data.len() * std::mem::size_of::<f32>();
let compressed_size = compressed_audio.len();
let compression_ratio = original_size as f32 / compressed_size as f32;
let achieved_bitrate =
(compressed_size as f32 * 8.0 * self.config.sample_rate) / audio_data.ncols() as f32;
let compression_stats = CompressionStats {
original_size,
compressed_size,
compression_ratio,
achieved_bitrate,
quality_loss: self.estimate_quality_loss(&masked_audio, &compressed_audio)?,
processing_time_ms: processing_time,
};
self.frame_count += 1;
Ok(CompressedFrame {
audio_data: compressed_audio,
spatial_metadata: spatial_metadata.clone(),
compression_stats,
timestamp_ms: self.frame_count as f64 * 1000.0 * audio_data.ncols() as f64
/ self.config.sample_rate as f64,
})
}
fn apply_perceptual_masking(&mut self, audio_data: &Array2<f32>) -> Result<Array2<f32>> {
if !self.config.perceptual_params.masking_enabled {
return Ok(audio_data.clone());
}
let mut masked_audio = audio_data.clone();
for channel_idx in 0..audio_data.nrows() {
let channel_data = audio_data.row(channel_idx).to_owned();
let masked_channel = self.perceptual_model.apply_masking(&channel_data)?;
masked_audio.row_mut(channel_idx).assign(&masked_channel);
}
if self.config.perceptual_params.temporal_masking.enabled {
self.perceptual_model
.apply_temporal_masking(&mut masked_audio)?;
}
Ok(masked_audio)
}
fn compress_perceptual_spatial(
&mut self,
audio_data: &Array2<f32>,
_spatial_metadata: &SpatialMetadata,
) -> Result<Vec<u8>> {
let mut compressed = Vec::new();
for channel in audio_data.rows() {
let channel_owned = channel.to_owned();
let quantized = self.quantize_channel(&channel_owned, self.config.quality)?;
compressed.extend_from_slice(&quantized);
}
self.apply_entropy_coding(&compressed)
}
fn compress_ambisonics_optimized(
&mut self,
audio_data: &Array2<f32>,
spatial_metadata: &SpatialMetadata,
) -> Result<Vec<u8>> {
let ambisonics_data = self.convert_to_ambisonics(audio_data, spatial_metadata)?;
let mut compressed = Vec::new();
let order = self.config.spatial_params.ambisonics_order;
for (idx, channel) in ambisonics_data.rows().into_iter().enumerate() {
let channel_order = self.get_ambisonics_channel_order(idx);
let quality_factor = if channel_order == 0 {
1.0
} else {
0.7 / channel_order as f32
};
let adjusted_quality = match self.config.quality {
CompressionQuality::Low => CompressionQuality::Low,
CompressionQuality::Medium => {
if quality_factor > 0.5 {
CompressionQuality::Medium
} else {
CompressionQuality::Low
}
}
CompressionQuality::High => {
if quality_factor > 0.7 {
CompressionQuality::High
} else {
CompressionQuality::Medium
}
}
CompressionQuality::VeryHigh => CompressionQuality::High,
};
let channel_owned = channel.to_owned();
let quantized = self.quantize_channel(&channel_owned, adjusted_quality)?;
compressed.extend_from_slice(&quantized);
}
self.apply_entropy_coding(&compressed)
}
fn compress_positional(
&mut self,
audio_data: &Array2<f32>,
spatial_metadata: &SpatialMetadata,
) -> Result<Vec<u8>> {
self.spatial_encoder
.update_clusters(&spatial_metadata.source_positions)?;
let mut compressed = Vec::new();
for cluster in &self.spatial_encoder.source_clusters {
let quantized =
self.quantize_channel(&cluster.representative_signal, self.config.quality)?;
compressed.extend_from_slice(&quantized);
let weight_bytes = self.quantize_weights(&cluster.mixing_weights)?;
compressed.extend_from_slice(&weight_bytes);
}
let cluster_metadata = self.compress_cluster_metadata()?;
compressed.extend_from_slice(&cluster_metadata);
self.apply_entropy_coding(&compressed)
}
fn compress_hybrid(
&mut self,
audio_data: &Array2<f32>,
spatial_metadata: &SpatialMetadata,
) -> Result<Vec<u8>> {
let mut compressed = Vec::new();
let low_freq_data = self.filter_frequency_range(audio_data, 0.0, 1000.0)?;
let low_compressed = self.compress_perceptual_spatial(&low_freq_data, spatial_metadata)?;
compressed.extend_from_slice(&low_compressed);
let mid_freq_data = self.filter_frequency_range(audio_data, 1000.0, 8000.0)?;
let mid_compressed =
self.compress_ambisonics_optimized(&mid_freq_data, spatial_metadata)?;
compressed.extend_from_slice(&mid_compressed);
let high_freq_data = self.filter_frequency_range(audio_data, 8000.0, 20000.0)?;
let high_compressed = self.compress_positional(&high_freq_data, spatial_metadata)?;
compressed.extend_from_slice(&high_compressed);
Ok(compressed)
}
fn compress_lossless(
&mut self,
audio_data: &Array2<f32>,
_spatial_metadata: &SpatialMetadata,
) -> Result<Vec<u8>> {
let mut data_bytes = Vec::new();
for &sample in audio_data.iter() {
data_bytes.extend_from_slice(&sample.to_le_bytes());
}
self.apply_lossless_compression(&data_bytes)
}
fn quantize_channel(
&self,
channel_data: &Array1<f32>,
quality: CompressionQuality,
) -> Result<Vec<u8>> {
let bit_depth = match quality {
CompressionQuality::Low => 8,
CompressionQuality::Medium => 12,
CompressionQuality::High => 16,
CompressionQuality::VeryHigh => 20,
};
let max_value = (1 << (bit_depth - 1)) - 1;
let mut quantized = Vec::new();
for &sample in channel_data.iter() {
let quantized_sample = (sample * max_value as f32) as i32;
let clamped_sample = quantized_sample.clamp(-max_value, max_value);
match bit_depth {
8 => quantized.push(clamped_sample as u8),
12 => {
quantized.push((clamped_sample & 0xFF) as u8);
quantized.push(((clamped_sample >> 8) & 0x0F) as u8);
}
16 => quantized.extend_from_slice(&(clamped_sample as i16).to_le_bytes()),
20 => {
quantized.extend_from_slice(&(clamped_sample & 0xFFFFFF).to_le_bytes()[..3]);
}
_ => return Err(Error::LegacyProcessing("Unsupported bit depth".to_string())),
}
}
Ok(quantized)
}
fn apply_entropy_coding(&self, data: &[u8]) -> Result<Vec<u8>> {
let mut compressed = Vec::new();
let mut i = 0;
while i < data.len() {
let current_byte = data[i];
let mut run_length = 1;
while i + run_length < data.len()
&& data[i + run_length] == current_byte
&& run_length < 255
{
run_length += 1;
}
if run_length > 3 {
compressed.push(0xFF); compressed.push(current_byte);
compressed.push(run_length as u8);
} else {
for _ in 0..run_length {
compressed.push(current_byte);
}
}
i += run_length;
}
Ok(compressed)
}
fn convert_to_ambisonics(
&self,
audio_data: &Array2<f32>,
spatial_metadata: &SpatialMetadata,
) -> Result<Array2<f32>> {
let order = self.config.spatial_params.ambisonics_order;
let ambisonics_channels = (order + 1) * (order + 1);
let mut ambisonics_data = Array2::zeros((ambisonics_channels, audio_data.ncols()));
for (source_idx, &position) in spatial_metadata.source_positions.iter().enumerate() {
if source_idx >= audio_data.nrows() {
break;
}
let azimuth = position.y.atan2(position.x);
let elevation = position
.z
.atan2((position.x * position.x + position.y * position.y).sqrt());
ambisonics_data
.row_mut(0)
.scaled_add(1.0, &audio_data.row(source_idx));
if ambisonics_channels > 1 {
ambisonics_data
.row_mut(1)
.scaled_add(azimuth.cos() * elevation.cos(), &audio_data.row(source_idx));
}
if ambisonics_channels > 2 {
ambisonics_data
.row_mut(2)
.scaled_add(azimuth.sin() * elevation.cos(), &audio_data.row(source_idx));
}
if ambisonics_channels > 3 {
ambisonics_data
.row_mut(3)
.scaled_add(elevation.sin(), &audio_data.row(source_idx));
}
}
Ok(ambisonics_data)
}
fn get_ambisonics_channel_order(&self, channel_idx: usize) -> usize {
if channel_idx == 0 {
0
} else if channel_idx <= 3 {
1
} else if channel_idx <= 8 {
2
} else {
3
}
}
fn filter_frequency_range(
&self,
audio_data: &Array2<f32>,
_low_freq: f32,
_high_freq: f32,
) -> Result<Array2<f32>> {
Ok(audio_data.clone())
}
fn quantize_weights(&self, weights: &Array1<f32>) -> Result<Vec<u8>> {
let mut quantized = Vec::new();
for &weight in weights.iter() {
let quantized_weight = (weight * 255.0) as u8;
quantized.push(quantized_weight);
}
Ok(quantized)
}
fn compress_cluster_metadata(&self) -> Result<Vec<u8>> {
let mut metadata = Vec::new();
metadata.push(self.spatial_encoder.source_clusters.len() as u8);
for cluster in &self.spatial_encoder.source_clusters {
let x_quantized = ((cluster.center.x + 10.0) * 25.5) as u8; let y_quantized = ((cluster.center.y + 10.0) * 25.5) as u8;
let z_quantized = ((cluster.center.z + 10.0) * 25.5) as u8;
metadata.extend_from_slice(&[x_quantized, y_quantized, z_quantized]);
metadata.push(cluster.source_indices.len() as u8);
}
Ok(metadata)
}
fn apply_lossless_compression(&self, data: &[u8]) -> Result<Vec<u8>> {
let mut compressed = Vec::new();
let mut i = 0;
while i < data.len() {
let mut best_length = 0;
let mut best_distance = 0;
let search_start = i.saturating_sub(4096);
for j in search_start..i {
let mut length = 0;
while i + length < data.len()
&& j + length < i
&& data[i + length] == data[j + length]
&& length < 255
{
length += 1;
}
if length > best_length && length >= 3 {
best_length = length;
best_distance = i - j;
}
}
if best_length > 0 {
compressed.push(0xFF); compressed.push(0xFE); compressed.extend_from_slice(&(best_distance as u16).to_le_bytes());
compressed.push(best_length as u8);
i += best_length;
} else {
compressed.push(data[i]);
i += 1;
}
}
Ok(compressed)
}
fn estimate_quality_loss(&self, _original: &Array2<f32>, _compressed: &[u8]) -> Result<f32> {
let compression_ratio = _original.len() as f32 * 4.0 / _compressed.len() as f32;
let quality_loss = (compression_ratio - 1.0) / 10.0;
Ok(quality_loss.clamp(0.0, 1.0))
}
pub fn config(&self) -> &SpatialCompressionConfig {
&self.config
}
pub fn get_stats(&self) -> Option<CompressionStats> {
None
}
}
impl PerceptualModel {
fn new(params: &PerceptualParams, sample_rate: f32) -> Result<Self> {
let frequency_bands = Array1::linspace(0.0, sample_rate / 2.0, params.frequency_bands);
let masking_thresholds = Array1::zeros(params.frequency_bands);
let bark_scale = Self::compute_bark_scale(&frequency_bands);
let temporal_state = TemporalMaskingState {
prev_energy: Array1::zeros(params.frequency_bands),
pre_masking_buffer: Array2::zeros((params.frequency_bands, 10)),
post_masking_buffer: Array2::zeros((params.frequency_bands, 100)),
};
Ok(Self {
frequency_bands,
masking_thresholds,
bark_scale,
temporal_state,
})
}
fn compute_bark_scale(frequencies: &Array1<f32>) -> Array1<f32> {
frequencies.mapv(|f| 13.0 * (0.00076 * f).atan() + 3.5 * ((f / 7500.0).powi(2)).atan())
}
fn apply_masking(&mut self, channel_data: &Array1<f32>) -> Result<Array1<f32>> {
Ok(channel_data.mapv(|x| x * 0.9))
}
fn apply_temporal_masking(&mut self, _audio_data: &mut Array2<f32>) -> Result<()> {
Ok(())
}
}
impl SpatialEncoder {
fn new(config: &SpatialCompressionConfig) -> Result<Self> {
let quantization_tables = HashMap::new();
let huffman_tables = HashMap::new();
let source_clusters = Vec::new();
Ok(Self {
method: config.codec,
quantization_tables,
huffman_tables,
source_clusters,
})
}
fn update_clusters(&mut self, _positions: &[Position3D]) -> Result<()> {
Ok(())
}
}
impl AdaptiveController {
fn new(params: &AdaptiveParams) -> Result<Self> {
Ok(Self {
current_bitrate: params.min_bitrate,
quality_history: Vec::new(),
bitrate_history: Vec::new(),
window_samples: (params.adaptation_window * 48000.0) as usize, })
}
fn update(&mut self, _config: &SpatialCompressionConfig) -> Result<()> {
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compression_config_default() {
let config = SpatialCompressionConfig::default();
assert_eq!(config.codec, CompressionCodec::PerceptualSpatial);
assert_eq!(config.quality, CompressionQuality::Medium);
assert_eq!(config.target_bitrate, 128000);
}
#[test]
fn test_compressor_creation() {
let config = SpatialCompressionConfig::default();
let compressor = SpatialCompressor::new(config);
assert!(compressor.is_ok());
}
#[test]
fn test_frame_compression() {
let config = SpatialCompressionConfig::default();
let mut compressor = SpatialCompressor::new(config).unwrap();
let audio_data = Array2::ones((8, 1024));
let spatial_metadata = SpatialMetadata {
source_positions: vec![Position3D {
x: 1.0,
y: 0.0,
z: 0.0,
}],
ambisonics_coefficients: vec![],
spatial_covariance: vec![],
distance_factors: vec![1.0],
listener_orientation: (0.0, 0.0, 0.0),
};
let result = compressor.compress_frame(&audio_data, &spatial_metadata);
assert!(result.is_ok());
let compressed_frame = result.unwrap();
assert!(!compressed_frame.audio_data.is_empty());
assert!(compressed_frame.compression_stats.compression_ratio > 1.0);
}
#[test]
fn test_quality_levels() {
let qualities = [
CompressionQuality::Low,
CompressionQuality::Medium,
CompressionQuality::High,
CompressionQuality::VeryHigh,
];
for quality in &qualities {
let mut config = SpatialCompressionConfig::default();
config.quality = *quality;
let compressor = SpatialCompressor::new(config);
assert!(compressor.is_ok());
}
}
#[test]
fn test_compression_codecs() {
let codecs = [
CompressionCodec::PerceptualSpatial,
CompressionCodec::AmbisonicsOptimized,
CompressionCodec::PositionalCompression,
CompressionCodec::Hybrid,
CompressionCodec::Lossless,
];
for codec in &codecs {
let mut config = SpatialCompressionConfig::default();
config.codec = *codec;
let compressor = SpatialCompressor::new(config);
assert!(compressor.is_ok());
}
}
#[test]
fn test_perceptual_model() {
let params = PerceptualParams {
masking_enabled: true,
frequency_bands: 32,
spatial_masking_threshold: -40.0,
temporal_masking: TemporalMasking {
enabled: true,
pre_masking_ms: 2.0,
post_masking_ms: 100.0,
threshold_db: -20.0,
},
loudness_compensation: true,
};
let model = PerceptualModel::new(¶ms, 48000.0);
assert!(model.is_ok());
}
}