voirs_conversion/
processing.rs

1//! Audio processing utilities for voice conversion
2
3use crate::{core::AudioFeatures, Error, Result};
4use scirs2_core::Complex;
5use scirs2_fft::RealFftPlanner;
6use tracing::{debug, trace};
7
8/// Audio buffer for processing
9#[derive(Debug, Clone)]
10pub struct AudioBuffer {
11    /// Audio samples
12    pub samples: Vec<f32>,
13    /// Sample rate
14    pub sample_rate: u32,
15    /// Buffer capacity
16    pub capacity: usize,
17    /// Current write position
18    write_pos: usize,
19    /// Ring buffer mode
20    ring_buffer: bool,
21}
22
23impl AudioBuffer {
24    /// Create new buffer
25    pub fn new(capacity: usize, sample_rate: u32) -> Self {
26        Self {
27            samples: vec![0.0; capacity],
28            sample_rate,
29            capacity,
30            write_pos: 0,
31            ring_buffer: false,
32        }
33    }
34
35    /// Create ring buffer
36    pub fn new_ring_buffer(capacity: usize, sample_rate: u32) -> Self {
37        let mut buffer = Self::new(capacity, sample_rate);
38        buffer.ring_buffer = true;
39        buffer
40    }
41
42    /// Add samples to buffer
43    pub fn push_samples(&mut self, samples: &[f32]) -> Result<()> {
44        if self.ring_buffer {
45            for &sample in samples {
46                self.samples[self.write_pos] = sample;
47                self.write_pos = (self.write_pos + 1) % self.capacity;
48            }
49        } else {
50            if self.samples.len() + samples.len() > self.capacity {
51                return Err(Error::buffer("Buffer overflow".to_string()));
52            }
53            self.samples.extend_from_slice(samples);
54        }
55        Ok(())
56    }
57
58    /// Get samples and clear buffer
59    pub fn drain(&mut self) -> Vec<f32> {
60        if self.ring_buffer {
61            let mut result = Vec::with_capacity(self.capacity);
62            for i in 0..self.capacity {
63                let idx = (self.write_pos + i) % self.capacity;
64                result.push(self.samples[idx]);
65                self.samples[idx] = 0.0;
66            }
67            result
68        } else {
69            std::mem::take(&mut self.samples)
70        }
71    }
72
73    /// Get current buffer level
74    pub fn level(&self) -> f32 {
75        if self.ring_buffer {
76            1.0 // Ring buffer is always "full"
77        } else {
78            self.samples.len() as f32 / self.capacity as f32
79        }
80    }
81
82    /// Clear buffer
83    pub fn clear(&mut self) {
84        if self.ring_buffer {
85            self.samples.fill(0.0);
86            self.write_pos = 0;
87        } else {
88            self.samples.clear();
89        }
90    }
91}
92
93/// Processing pipeline for audio
94#[derive(Debug, Clone)]
95pub struct ProcessingPipeline {
96    /// Pipeline stages
97    pub stages: Vec<ProcessingStage>,
98    /// Pipeline configuration
99    pub config: PipelineConfig,
100}
101
102/// Configuration for processing pipeline
103#[derive(Debug, Clone)]
104pub struct PipelineConfig {
105    /// Enable parallel processing
106    pub parallel: bool,
107    /// Maximum concurrent stages
108    pub max_concurrent: usize,
109    /// Enable stage caching
110    pub enable_caching: bool,
111}
112
113impl Default for PipelineConfig {
114    fn default() -> Self {
115        Self {
116            parallel: true,
117            max_concurrent: 4,
118            enable_caching: true,
119        }
120    }
121}
122
123impl ProcessingPipeline {
124    /// Create new pipeline
125    pub fn new() -> Self {
126        Self {
127            stages: Vec::new(),
128            config: PipelineConfig::default(),
129        }
130    }
131
132    /// Create pipeline with configuration
133    pub fn with_config(config: PipelineConfig) -> Self {
134        Self {
135            stages: Vec::new(),
136            config,
137        }
138    }
139
140    /// Add processing stage
141    pub fn add_stage(&mut self, stage: ProcessingStage) {
142        self.stages.push(stage);
143    }
144
145    /// Process audio through pipeline
146    pub async fn process(&self, input: &[f32]) -> Result<Vec<f32>> {
147        let mut output = input.to_vec();
148
149        if self.config.parallel && self.stages.len() > 1 {
150            // Parallel processing for independent stages
151            for stage in &self.stages {
152                if stage.can_run_parallel() {
153                    output = stage.process(&output).await?;
154                }
155            }
156        } else {
157            // Sequential processing
158            for stage in &self.stages {
159                output = stage.process(&output).await?;
160            }
161        }
162
163        Ok(output)
164    }
165
166    /// Get pipeline latency estimate
167    pub fn estimated_latency_ms(&self, sample_rate: u32) -> f32 {
168        self.stages
169            .iter()
170            .map(|stage| stage.estimated_latency_ms(sample_rate))
171            .sum()
172    }
173}
174
175impl Default for ProcessingPipeline {
176    fn default() -> Self {
177        Self::new()
178    }
179}
180
181/// Individual processing stage
182#[derive(Debug, Clone)]
183pub struct ProcessingStage {
184    /// Stage name
185    pub name: String,
186    /// Stage type
187    pub stage_type: StageType,
188    /// Stage parameters
189    pub parameters: std::collections::HashMap<String, f32>,
190    /// Enables parallel execution
191    pub parallel_capable: bool,
192}
193
194/// Types of processing stages
195#[derive(Debug, Clone, PartialEq, Eq)]
196pub enum StageType {
197    /// Normalization stage
198    Normalize,
199    /// Noise reduction
200    NoiseReduction,
201    /// Filtering
202    Filter,
203    /// Resampling
204    Resample,
205    /// Compression
206    Compression,
207    /// Custom processing
208    Custom(String),
209}
210
211impl ProcessingStage {
212    /// Create new stage
213    pub fn new(name: String, stage_type: StageType) -> Self {
214        Self {
215            name,
216            stage_type,
217            parameters: std::collections::HashMap::new(),
218            parallel_capable: true,
219        }
220    }
221
222    /// Set parameter
223    pub fn with_parameter(mut self, key: String, value: f32) -> Self {
224        self.parameters.insert(key, value);
225        self
226    }
227
228    /// Set parallel capability
229    pub fn with_parallel(mut self, parallel: bool) -> Self {
230        self.parallel_capable = parallel;
231        self
232    }
233
234    /// Check if stage can run in parallel
235    pub fn can_run_parallel(&self) -> bool {
236        self.parallel_capable
237    }
238
239    /// Process audio in this stage
240    pub async fn process(&self, input: &[f32]) -> Result<Vec<f32>> {
241        trace!(
242            "Processing stage: {} with {} samples",
243            self.name,
244            input.len()
245        );
246
247        match self.stage_type {
248            StageType::Normalize => self.normalize(input),
249            StageType::NoiseReduction => self.noise_reduction(input),
250            StageType::Filter => self.filter(input),
251            StageType::Resample => self.resample(input),
252            StageType::Compression => self.compression(input),
253            StageType::Custom(_) => {
254                // Custom processing - placeholder
255                Ok(input.to_vec())
256            }
257        }
258    }
259
260    /// Estimate processing latency
261    pub fn estimated_latency_ms(&self, _sample_rate: u32) -> f32 {
262        match self.stage_type {
263            StageType::Normalize => 0.1,
264            StageType::NoiseReduction => 2.0,
265            StageType::Filter => 0.5,
266            StageType::Resample => 1.0,
267            StageType::Compression => 0.3,
268            StageType::Custom(_) => 1.0,
269        }
270    }
271
272    // Stage-specific processing methods
273
274    fn normalize(&self, input: &[f32]) -> Result<Vec<f32>> {
275        if input.is_empty() {
276            return Ok(input.to_vec());
277        }
278
279        let max_val = input
280            .iter()
281            .map(|x| x.abs())
282            .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
283            .unwrap_or(1.0);
284        if max_val == 0.0 {
285            return Ok(input.to_vec());
286        }
287
288        let target_level = self.parameters.get("target_level").copied().unwrap_or(0.9);
289        let scale = target_level / max_val;
290
291        Ok(input.iter().map(|x| x * scale).collect())
292    }
293
294    fn noise_reduction(&self, input: &[f32]) -> Result<Vec<f32>> {
295        let noise_threshold = self
296            .parameters
297            .get("noise_threshold")
298            .copied()
299            .unwrap_or(0.01);
300
301        Ok(input
302            .iter()
303            .map(|&x| {
304                if x.abs() < noise_threshold {
305                    x * 0.1 // Reduce low-level noise
306                } else {
307                    x
308                }
309            })
310            .collect())
311    }
312
313    fn filter(&self, input: &[f32]) -> Result<Vec<f32>> {
314        let cutoff = self.parameters.get("cutoff").copied().unwrap_or(0.5);
315
316        // Simple low-pass filter
317        let mut output = Vec::with_capacity(input.len());
318        let mut prev = 0.0;
319
320        for &sample in input {
321            let filtered = prev + cutoff * (sample - prev);
322            output.push(filtered);
323            prev = filtered;
324        }
325
326        Ok(output)
327    }
328
329    fn resample(&self, input: &[f32]) -> Result<Vec<f32>> {
330        let ratio = self.parameters.get("ratio").copied().unwrap_or(1.0);
331
332        if ratio == 1.0 {
333            return Ok(input.to_vec());
334        }
335
336        let output_len = (input.len() as f32 * ratio) as usize;
337        let mut output = Vec::with_capacity(output_len);
338
339        for i in 0..output_len {
340            let src_idx = (i as f32 / ratio) as usize;
341            if src_idx < input.len() {
342                output.push(input[src_idx]);
343            } else {
344                output.push(0.0);
345            }
346        }
347
348        Ok(output)
349    }
350
351    fn compression(&self, input: &[f32]) -> Result<Vec<f32>> {
352        let ratio = self.parameters.get("ratio").copied().unwrap_or(4.0);
353        let threshold = self.parameters.get("threshold").copied().unwrap_or(0.7);
354
355        Ok(input
356            .iter()
357            .map(|&x| {
358                let abs_x = x.abs();
359                if abs_x > threshold {
360                    let excess = abs_x - threshold;
361                    let compressed_excess = excess / ratio;
362                    let sign = if x >= 0.0 { 1.0 } else { -1.0 };
363                    sign * (threshold + compressed_excess)
364                } else {
365                    x
366                }
367            })
368            .collect())
369    }
370}
371
372/// Feature extractor for audio analysis
373pub struct FeatureExtractor {
374    /// Sample rate for processing
375    sample_rate: u32,
376    /// FFT planner
377    #[allow(dead_code)]
378    fft_planner: RealFftPlanner<f32>,
379    /// Feature cache
380    cache: std::collections::HashMap<String, AudioFeatures>,
381}
382
383impl std::fmt::Debug for FeatureExtractor {
384    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
385        f.debug_struct("FeatureExtractor")
386            .field("sample_rate", &self.sample_rate)
387            .field("cache", &self.cache)
388            .finish()
389    }
390}
391
392impl FeatureExtractor {
393    /// Create new feature extractor
394    pub fn new(sample_rate: u32) -> Self {
395        Self {
396            sample_rate,
397            fft_planner: RealFftPlanner::<f32>::new(),
398            cache: std::collections::HashMap::new(),
399        }
400    }
401
402    /// Extract comprehensive audio features
403    pub async fn extract_features(&self, audio: &[f32], sample_rate: u32) -> Result<AudioFeatures> {
404        debug!(
405            "Extracting features from {} samples at {} Hz",
406            audio.len(),
407            sample_rate
408        );
409
410        // Resample if necessary
411        let processed_audio = if sample_rate != self.sample_rate {
412            self.resample_audio(audio, sample_rate, self.sample_rate)?
413        } else {
414            audio.to_vec()
415        };
416
417        // Extract different feature types
418        let spectral = self.extract_spectral_features(&processed_audio)?;
419        let temporal = self.extract_temporal_features(&processed_audio)?;
420        let prosodic = self.extract_prosodic_features(&processed_audio)?;
421        let speaker_embedding = None; // Would require neural network
422
423        Ok(AudioFeatures {
424            spectral,
425            temporal,
426            prosodic,
427            speaker_embedding,
428            quality: Vec::new(),   // Placeholder for quality features
429            formants: Vec::new(),  // Placeholder for formant features
430            harmonics: Vec::new(), // Placeholder for harmonic features
431        })
432    }
433
434    /// Extract spectral features
435    fn extract_spectral_features(&self, audio: &[f32]) -> Result<Vec<f32>> {
436        let mut features = Vec::new();
437
438        // Window parameters
439        let window_size = 1024;
440        let hop_size = 512;
441
442        if audio.len() < window_size {
443            return Ok(vec![0.0; 13]); // Return zero features for short audio
444        }
445
446        // Process windows
447        let mut spectral_centroids = Vec::new();
448        let mut spectral_rolloffs = Vec::new();
449        let mut mfccs = Vec::new();
450
451        for window_start in (0..audio.len() - window_size).step_by(hop_size) {
452            let window = &audio[window_start..window_start + window_size];
453
454            // Apply window function (Hann window)
455            let windowed: Vec<f32> = window
456                .iter()
457                .enumerate()
458                .map(|(i, &x)| {
459                    let hann = 0.5
460                        - 0.5
461                            * (2.0 * std::f32::consts::PI * i as f32 / (window_size - 1) as f32)
462                                .cos();
463                    x * hann
464                })
465                .collect();
466
467            // Compute FFT
468            let spectrum = self.compute_fft(&windowed)?;
469
470            // Extract spectral features
471            spectral_centroids.push(self.compute_spectral_centroid(&spectrum));
472            spectral_rolloffs.push(self.compute_spectral_rolloff(&spectrum, 0.85));
473
474            // Compute MFCCs (simplified)
475            let mel_spectrum = self.compute_mel_spectrum(&spectrum, 13);
476            mfccs.extend(mel_spectrum);
477        }
478
479        // Aggregate features
480        features.push(self.mean(&spectral_centroids)); // Spectral centroid mean
481        features.push(self.std(&spectral_centroids)); // Spectral centroid std
482        features.push(self.mean(&spectral_rolloffs)); // Spectral rolloff mean
483        features.push(self.std(&spectral_rolloffs)); // Spectral rolloff std
484
485        // Add MFCC statistics (first 13 coefficients)
486        if !mfccs.is_empty() {
487            let chunk_size = 13;
488            for i in 0..chunk_size {
489                let coeff_values: Vec<f32> =
490                    mfccs.iter().skip(i).step_by(chunk_size).copied().collect();
491                features.push(self.mean(&coeff_values));
492            }
493        } else {
494            features.extend(vec![0.0; 13]);
495        }
496
497        Ok(features)
498    }
499
500    /// Extract temporal features
501    fn extract_temporal_features(&self, audio: &[f32]) -> Result<Vec<f32>> {
502        let mut features = Vec::new();
503
504        // RMS energy
505        let rms = (audio.iter().map(|x| x * x).sum::<f32>() / audio.len() as f32).sqrt();
506        features.push(rms);
507
508        // Zero crossing rate
509        let zcr = audio
510            .windows(2)
511            .filter(|w| (w[0] > 0.0) != (w[1] > 0.0))
512            .count() as f32
513            / (audio.len() - 1) as f32;
514        features.push(zcr);
515
516        // Energy contour statistics
517        let frame_size = self.sample_rate as usize / 100; // 10ms frames
518        let mut energy_contour = Vec::new();
519
520        for chunk in audio.chunks(frame_size) {
521            let energy = chunk.iter().map(|x| x * x).sum::<f32>() / chunk.len() as f32;
522            energy_contour.push(energy.sqrt());
523        }
524
525        features.push(self.mean(&energy_contour));
526        features.push(self.std(&energy_contour));
527
528        // Spectral flux (simplified)
529        let spectral_flux = self.compute_spectral_flux(audio)?;
530        features.push(spectral_flux);
531
532        Ok(features)
533    }
534
535    /// Extract prosodic features
536    fn extract_prosodic_features(&self, audio: &[f32]) -> Result<Vec<f32>> {
537        let mut features = Vec::new();
538
539        // Fundamental frequency estimation (simplified autocorrelation)
540        let f0_values = self.estimate_f0_contour(audio)?;
541
542        if !f0_values.is_empty() {
543            features.push(self.mean(&f0_values)); // Mean F0
544            features.push(self.std(&f0_values)); // F0 variance
545            features.push(f0_values.iter().copied().reduce(f32::max).unwrap_or(0.0)); // Max F0
546            features.push(f0_values.iter().copied().reduce(f32::min).unwrap_or(0.0));
547        // Min F0
548        } else {
549            features.extend(vec![0.0; 4]);
550        }
551
552        // Intensity contour
553        let intensity_values = self.compute_intensity_contour(audio);
554        features.push(self.mean(&intensity_values));
555        features.push(self.std(&intensity_values));
556
557        // Speaking rate estimate (simplified)
558        let speaking_rate = self.estimate_speaking_rate(audio)?;
559        features.push(speaking_rate);
560
561        Ok(features)
562    }
563
564    // Helper methods for feature extraction
565
566    fn resample_audio(&self, audio: &[f32], from_rate: u32, to_rate: u32) -> Result<Vec<f32>> {
567        if from_rate == to_rate {
568            return Ok(audio.to_vec());
569        }
570
571        let ratio = to_rate as f32 / from_rate as f32;
572        let output_len = (audio.len() as f32 * ratio) as usize;
573        let mut output = Vec::with_capacity(output_len);
574
575        for i in 0..output_len {
576            let src_idx = i as f32 / ratio;
577            let idx = src_idx as usize;
578
579            if idx + 1 < audio.len() {
580                // Linear interpolation
581                let frac = src_idx - idx as f32;
582                let sample = audio[idx] * (1.0 - frac) + audio[idx + 1] * frac;
583                output.push(sample);
584            } else if idx < audio.len() {
585                output.push(audio[idx]);
586            } else {
587                output.push(0.0);
588            }
589        }
590
591        Ok(output)
592    }
593
594    fn compute_fft(&self, audio: &[f32]) -> Result<Vec<f32>> {
595        let mut planner = RealFftPlanner::<f32>::new();
596        let fft = planner.plan_fft_forward(audio.len());
597
598        let input = audio.to_vec();
599        let mut output = vec![Complex::new(0.0, 0.0); audio.len() / 2 + 1];
600
601        fft.process(&input, &mut output);
602
603        Ok(output.iter().map(|c| c.norm()).collect())
604    }
605
606    fn compute_spectral_centroid(&self, spectrum: &[f32]) -> f32 {
607        let mut weighted_sum = 0.0;
608        let mut magnitude_sum = 0.0;
609
610        for (i, &magnitude) in spectrum.iter().enumerate() {
611            let freq = i as f32 * self.sample_rate as f32 / (2.0 * spectrum.len() as f32);
612            weighted_sum += freq * magnitude;
613            magnitude_sum += magnitude;
614        }
615
616        if magnitude_sum > 0.0 {
617            weighted_sum / magnitude_sum
618        } else {
619            0.0
620        }
621    }
622
623    fn compute_spectral_rolloff(&self, spectrum: &[f32], rolloff_point: f32) -> f32 {
624        let total_energy: f32 = spectrum.iter().map(|x| x * x).sum();
625        let target_energy = total_energy * rolloff_point;
626
627        let mut cumulative_energy = 0.0;
628        for (i, &magnitude) in spectrum.iter().enumerate() {
629            cumulative_energy += magnitude * magnitude;
630            if cumulative_energy >= target_energy {
631                return i as f32 * self.sample_rate as f32 / (2.0 * spectrum.len() as f32);
632            }
633        }
634
635        (spectrum.len() - 1) as f32 * self.sample_rate as f32 / (2.0 * spectrum.len() as f32)
636    }
637
638    fn compute_mel_spectrum(&self, spectrum: &[f32], num_coeffs: usize) -> Vec<f32> {
639        // Simplified mel-scale computation
640        let mut mel_spectrum = vec![0.0; num_coeffs];
641        let mel_low = self.hz_to_mel(0.0);
642        let mel_high = self.hz_to_mel(self.sample_rate as f32 / 2.0);
643
644        for (i, mel_value) in mel_spectrum.iter_mut().enumerate().take(num_coeffs) {
645            let mel_center = mel_low + (mel_high - mel_low) * i as f32 / (num_coeffs - 1) as f32;
646            let hz_center = self.mel_to_hz(mel_center);
647            let bin_center = hz_center * spectrum.len() as f32 * 2.0 / self.sample_rate as f32;
648
649            let start_bin = (bin_center - 1.0).max(0.0) as usize;
650            let end_bin = ((bin_center + 1.0) as usize).min(spectrum.len() - 1);
651
652            for j in start_bin..=end_bin {
653                if j < spectrum.len() {
654                    *mel_value += spectrum[j];
655                }
656            }
657        }
658
659        // Apply DCT for MFCC
660        self.apply_dct(&mel_spectrum)
661    }
662
663    fn hz_to_mel(&self, hz: f32) -> f32 {
664        2595.0 * (1.0 + hz / 700.0).log10()
665    }
666
667    fn mel_to_hz(&self, mel: f32) -> f32 {
668        700.0 * (10.0_f32.powf(mel / 2595.0) - 1.0)
669    }
670
671    fn apply_dct(&self, input: &[f32]) -> Vec<f32> {
672        let n = input.len();
673        let mut output = vec![0.0; n];
674
675        for (k, output_value) in output.iter_mut().enumerate().take(n) {
676            let mut sum = 0.0;
677            for (i, &input_value) in input.iter().enumerate().take(n) {
678                sum += input_value
679                    * (std::f32::consts::PI * k as f32 * (i as f32 + 0.5) / n as f32).cos();
680            }
681            *output_value = sum;
682        }
683
684        output
685    }
686
687    fn compute_spectral_flux(&self, audio: &[f32]) -> Result<f32> {
688        // Simplified spectral flux computation
689        let window_size = 1024;
690        let hop_size = 512;
691
692        if audio.len() < window_size * 2 {
693            return Ok(0.0);
694        }
695
696        let mut flux_values = Vec::new();
697
698        for i in (hop_size..audio.len() - window_size).step_by(hop_size) {
699            let window1 = &audio[i - hop_size..i - hop_size + window_size];
700            let window2 = &audio[i..i + window_size];
701
702            let spectrum1 = self.compute_fft(window1)?;
703            let spectrum2 = self.compute_fft(window2)?;
704
705            let flux: f32 = spectrum1
706                .iter()
707                .zip(spectrum2.iter())
708                .map(|(s1, s2)| (s2 - s1).max(0.0))
709                .sum();
710
711            flux_values.push(flux);
712        }
713
714        Ok(self.mean(&flux_values))
715    }
716
717    fn estimate_f0_contour(&self, audio: &[f32]) -> Result<Vec<f32>> {
718        let frame_size = self.sample_rate as usize / 100; // 10ms frames
719        let mut f0_values = Vec::new();
720
721        for chunk in audio.chunks(frame_size) {
722            if chunk.len() < frame_size / 2 {
723                continue;
724            }
725
726            let f0 = self.estimate_f0_autocorrelation(chunk);
727            f0_values.push(f0);
728        }
729
730        Ok(f0_values)
731    }
732
733    fn estimate_f0_autocorrelation(&self, frame: &[f32]) -> f32 {
734        let min_period = self.sample_rate / 500; // 500 Hz max
735        let max_period = self.sample_rate / 50; // 50 Hz min
736
737        let mut max_correlation = 0.0;
738        let mut best_period = min_period;
739
740        for period in min_period..max_period.min(frame.len() as u32 / 2) {
741            let mut correlation = 0.0;
742            let period_samples = period as usize;
743
744            for i in 0..(frame.len() - period_samples) {
745                correlation += frame[i] * frame[i + period_samples];
746            }
747
748            if correlation > max_correlation {
749                max_correlation = correlation;
750                best_period = period;
751            }
752        }
753
754        if max_correlation > 0.0 {
755            self.sample_rate as f32 / best_period as f32
756        } else {
757            0.0
758        }
759    }
760
761    fn compute_intensity_contour(&self, audio: &[f32]) -> Vec<f32> {
762        let frame_size = self.sample_rate as usize / 100; // 10ms frames
763        let mut intensity_values = Vec::new();
764
765        for chunk in audio.chunks(frame_size) {
766            let intensity = chunk.iter().map(|x| x * x).sum::<f32>() / chunk.len() as f32;
767            intensity_values.push(intensity.sqrt());
768        }
769
770        intensity_values
771    }
772
773    fn estimate_speaking_rate(&self, audio: &[f32]) -> Result<f32> {
774        // Simple syllable counting based on energy peaks
775        let intensity = self.compute_intensity_contour(audio);
776        let threshold = self.mean(&intensity) * 1.2;
777
778        let mut peak_count = 0;
779        let mut in_peak = false;
780
781        for &value in &intensity {
782            if value > threshold && !in_peak {
783                peak_count += 1;
784                in_peak = true;
785            } else if value <= threshold {
786                in_peak = false;
787            }
788        }
789
790        let duration_seconds = audio.len() as f32 / self.sample_rate as f32;
791        Ok(peak_count as f32 / duration_seconds * 60.0) // Peaks per minute
792    }
793
794    fn mean(&self, values: &[f32]) -> f32 {
795        if values.is_empty() {
796            0.0
797        } else {
798            values.iter().sum::<f32>() / values.len() as f32
799        }
800    }
801
802    fn std(&self, values: &[f32]) -> f32 {
803        if values.len() < 2 {
804            return 0.0;
805        }
806
807        let mean = self.mean(values);
808        let variance =
809            values.iter().map(|x| (x - mean).powi(2)).sum::<f32>() / (values.len() - 1) as f32;
810
811        variance.sqrt()
812    }
813}
814
815/// Signal processor for audio manipulation
816#[derive(Debug)]
817pub struct SignalProcessor {
818    /// Buffer size for processing
819    #[allow(dead_code)]
820    buffer_size: usize,
821    /// Processing cache
822    #[allow(dead_code)]
823    cache: std::collections::HashMap<String, Vec<f32>>,
824}
825
826impl SignalProcessor {
827    /// Create new signal processor
828    pub fn new(buffer_size: usize) -> Self {
829        Self {
830            buffer_size,
831            cache: std::collections::HashMap::new(),
832        }
833    }
834
835    /// Normalize audio to target level
836    pub fn normalize(&self, audio: &[f32]) -> Result<Vec<f32>> {
837        if audio.is_empty() {
838            return Ok(audio.to_vec());
839        }
840
841        let max_val = audio
842            .iter()
843            .map(|x| x.abs())
844            .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
845            .unwrap_or(1.0);
846        if max_val == 0.0 {
847            return Ok(audio.to_vec());
848        }
849
850        let scale = 0.95 / max_val;
851        Ok(audio.iter().map(|x| x * scale).collect())
852    }
853
854    /// Apply noise reduction
855    pub fn denoise(&self, audio: &[f32], _sample_rate: u32) -> Result<Vec<f32>> {
856        // Simple spectral gating
857        let noise_threshold = 0.02;
858
859        Ok(audio
860            .iter()
861            .map(|&x| {
862                if x.abs() < noise_threshold {
863                    x * 0.1
864                } else {
865                    x
866                }
867            })
868            .collect())
869    }
870
871    /// Resample audio
872    pub fn resample(&self, audio: &[f32], from_rate: u32, to_rate: u32) -> Result<Vec<f32>> {
873        if from_rate == to_rate {
874            return Ok(audio.to_vec());
875        }
876
877        let ratio = to_rate as f32 / from_rate as f32;
878        let output_len = (audio.len() as f32 * ratio) as usize;
879        let mut output = Vec::with_capacity(output_len);
880
881        for i in 0..output_len {
882            let src_idx = i as f32 / ratio;
883            let idx = src_idx as usize;
884
885            if idx + 1 < audio.len() {
886                let frac = src_idx - idx as f32;
887                let sample = audio[idx] * (1.0 - frac) + audio[idx + 1] * frac;
888                output.push(sample);
889            } else if idx < audio.len() {
890                output.push(audio[idx]);
891            } else {
892                output.push(0.0);
893            }
894        }
895
896        Ok(output)
897    }
898
899    /// Apply smoothing filter
900    pub fn smooth(&self, audio: &[f32]) -> Result<Vec<f32>> {
901        if audio.len() < 3 {
902            return Ok(audio.to_vec());
903        }
904
905        let mut output = Vec::with_capacity(audio.len());
906        output.push(audio[0]);
907
908        for i in 1..audio.len() - 1 {
909            let smoothed = (audio[i - 1] + 2.0 * audio[i] + audio[i + 1]) / 4.0;
910            output.push(smoothed);
911        }
912
913        output.push(audio[audio.len() - 1]);
914        Ok(output)
915    }
916
917    /// Apply dynamic range compression
918    pub fn compress(&self, audio: &[f32], ratio: f32) -> Result<Vec<f32>> {
919        let threshold = 0.7;
920
921        Ok(audio
922            .iter()
923            .map(|&x| {
924                let abs_x = x.abs();
925                if abs_x > threshold {
926                    let excess = abs_x - threshold;
927                    let compressed_excess = excess / ratio;
928                    let sign = if x >= 0.0 { 1.0 } else { -1.0 };
929                    sign * (threshold + compressed_excess)
930                } else {
931                    x
932                }
933            })
934            .collect())
935    }
936}
voirs_conversion/processing.rs

voirs_conversion/
processing.rs