voirs_vocoder/
lib.rs

1//! # VoiRS Vocoders
2//!
3//! Neural vocoders for converting mel spectrograms to high-quality audio.
4//! Supports HiFi-GAN, WaveGlow, and other state-of-the-art vocoders.
5//!
6//! This crate provides a unified interface for neural vocoding with support for:
7//! - Multiple neural architectures (HiFi-GAN, DiffWave, WaveGlow)
8//! - Real-time streaming synthesis
9//! - Batch processing for efficiency
10//! - Quality control and performance monitoring
11//! - Singing voice processing with harmonic enhancement
12//! - Spatial audio processing with HRTF
13//!
14//! # Quick Start
15//!
16//! ```rust,ignore
17//! // This is a conceptual example - actual usage will depend on specific implementations
18//! use voirs_vocoder::{Vocoder, MelSpectrogram, SynthesisConfig};
19//!
20//! async fn example_usage() {
21//!     // Create a mel spectrogram (this would typically come from a TTS model)
22//!     let mel_data = vec![vec![0.0; 80]; 100]; // 80 mel bands, 100 time steps
23//!     let mel = MelSpectrogram::new(mel_data, 22050, 256);
24//!     
25//!     // Configure synthesis parameters
26//!     let config = SynthesisConfig::default();
27//!     
28//!     // Convert to audio using any Vocoder implementation
29//!     // let audio_buffer = vocoder.vocode(&mel, Some(&config)).await?;
30//! }
31//! ```
32
33// Allow pedantic lints that are acceptable for audio/DSP processing code
34#![allow(clippy::cast_precision_loss)] // Acceptable for audio sample conversions
35#![allow(clippy::cast_possible_truncation)] // Controlled truncation in audio processing
36#![allow(clippy::cast_sign_loss)] // Intentional in index calculations
37#![allow(clippy::missing_errors_doc)] // Many internal functions with self-documenting error types
38#![allow(clippy::missing_panics_doc)] // Panics are documented where relevant
39#![allow(clippy::unused_self)] // Some trait implementations require &self for consistency
40#![allow(clippy::must_use_candidate)] // Not all return values need must_use annotation
41#![allow(clippy::doc_markdown)] // Technical terms don't all need backticks
42#![allow(clippy::unnecessary_wraps)] // Result wrappers maintained for API consistency
43#![allow(clippy::float_cmp)] // Exact float comparisons are intentional in some contexts
44#![allow(clippy::match_same_arms)] // Pattern matching clarity sometimes requires duplication
45#![allow(clippy::module_name_repetitions)] // Type names often repeat module names
46#![allow(clippy::struct_excessive_bools)] // Config structs naturally have many boolean flags
47#![allow(clippy::too_many_lines)] // Some functions are inherently complex
48#![allow(clippy::needless_pass_by_value)] // Some functions designed for ownership transfer
49#![allow(clippy::similar_names)] // Many similar variable names in algorithms
50#![allow(clippy::unused_async)] // Public API functions may need async for consistency
51#![allow(clippy::needless_range_loop)] // Range loops sometimes clearer than iterators
52#![allow(clippy::uninlined_format_args)] // Explicit argument names can improve clarity
53#![allow(clippy::manual_clamp)] // Manual clamping sometimes clearer
54#![allow(clippy::return_self_not_must_use)] // Not all builder methods need must_use
55#![allow(clippy::cast_possible_wrap)] // Controlled wrapping in processing code
56#![allow(clippy::cast_lossless)] // Explicit casts preferred for clarity
57#![allow(clippy::wildcard_imports)] // Prelude imports are convenient and standard
58#![allow(clippy::format_push_string)] // Sometimes more readable than alternative
59#![allow(clippy::redundant_closure_for_method_calls)] // Closures sometimes needed for type inference
60
61use async_trait::async_trait;
62use futures::Stream;
63use serde::{Deserialize, Serialize};
64use std::collections::HashMap;
65use thiserror::Error;
66
67/// Result type for vocoder operations
68pub type Result<T> = std::result::Result<T, VocoderError>;
69
70/// Vocoder-specific error types
71#[derive(Error, Debug)]
72pub enum VocoderError {
73    #[error("Vocoding failed: {0}")]
74    VocodingError(String),
75
76    #[error("Model loading failed: {0}")]
77    ModelError(String),
78
79    #[error("Invalid input: {0}")]
80    InputError(String),
81
82    #[error("Invalid mel spectrogram: {0}")]
83    InvalidMelSpectrogram(String),
84
85    #[error("Configuration error: {0}")]
86    ConfigurationError(String),
87
88    #[error("Config error: {0}")]
89    ConfigError(String),
90
91    #[error("Streaming error: {0}")]
92    StreamingError(String),
93
94    #[error("Runtime error: {0}")]
95    RuntimeError(String),
96
97    #[error("Processing error: {0}")]
98    ProcessingError(String),
99
100    #[error("Other error: {0}")]
101    Other(String),
102
103    #[cfg(feature = "candle")]
104    #[error("Candle error: {0}")]
105    CandleError(#[from] candle_core::Error),
106
107    #[error("IO error: {0}")]
108    IoError(#[from] std::io::Error),
109
110    #[error("FFT error: {0}")]
111    FFTError(#[from] scirs2_fft::FFTError),
112}
113
114/// Language codes supported by VoiRS
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
116pub enum LanguageCode {
117    /// English (US)
118    EnUs,
119    /// English (UK)
120    EnGb,
121    /// Japanese
122    Ja,
123    /// Mandarin Chinese
124    ZhCn,
125    /// Korean
126    Ko,
127    /// German
128    De,
129    /// French
130    Fr,
131    /// Spanish
132    Es,
133}
134
135impl LanguageCode {
136    /// Get string representation
137    pub fn as_str(&self) -> &'static str {
138        match self {
139            LanguageCode::EnUs => "en-US",
140            LanguageCode::EnGb => "en-GB",
141            LanguageCode::Ja => "ja",
142            LanguageCode::ZhCn => "zh-CN",
143            LanguageCode::Ko => "ko",
144            LanguageCode::De => "de",
145            LanguageCode::Fr => "fr",
146            LanguageCode::Es => "es",
147        }
148    }
149}
150
151/// Mel spectrogram representation
152#[derive(Debug, Clone, Serialize, Deserialize)]
153pub struct MelSpectrogram {
154    /// Mel filterbank data [n_mels, n_frames]
155    pub data: Vec<Vec<f32>>,
156    /// Number of mel channels
157    pub n_mels: usize,
158    /// Number of time frames
159    pub n_frames: usize,
160    /// Sample rate of original audio
161    pub sample_rate: u32,
162    /// Hop length in samples
163    pub hop_length: u32,
164}
165
166impl MelSpectrogram {
167    /// Create new mel spectrogram
168    pub fn new(data: Vec<Vec<f32>>, sample_rate: u32, hop_length: u32) -> Self {
169        let n_mels = data.len();
170        let n_frames = data.first().map_or(0, |row| row.len());
171
172        Self {
173            data,
174            n_mels,
175            n_frames,
176            sample_rate,
177            hop_length,
178        }
179    }
180
181    /// Get duration in seconds
182    pub fn duration(&self) -> f32 {
183        (self.n_frames as u32 * self.hop_length) as f32 / self.sample_rate as f32
184    }
185}
186
187/// Synthesis configuration
188#[derive(Debug, Clone, Serialize, Deserialize)]
189pub struct SynthesisConfig {
190    /// Speaking rate multiplier (1.0 = normal)
191    pub speed: f32,
192    /// Pitch shift in semitones
193    pub pitch_shift: f32,
194    /// Energy/volume multiplier
195    pub energy: f32,
196    /// Speaker ID for multi-speaker models
197    pub speaker_id: Option<u32>,
198    /// Random seed for reproducible generation
199    pub seed: Option<u64>,
200}
201
202impl Default for SynthesisConfig {
203    fn default() -> Self {
204        Self {
205            speed: 1.0,
206            pitch_shift: 0.0,
207            energy: 1.0,
208            speaker_id: None,
209            seed: None,
210        }
211    }
212}
213
214/// Audio buffer for holding PCM audio data
215#[derive(Debug, Clone)]
216pub struct AudioBuffer {
217    /// Audio samples (interleaved for multi-channel)
218    samples: Vec<f32>,
219    /// Sample rate in Hz
220    sample_rate: u32,
221    /// Number of channels
222    channels: u32,
223}
224
225impl AudioBuffer {
226    /// Create new audio buffer
227    pub fn new(samples: Vec<f32>, sample_rate: u32, channels: u32) -> Self {
228        Self {
229            samples,
230            sample_rate,
231            channels,
232        }
233    }
234
235    /// Create silence
236    pub fn silence(duration: f32, sample_rate: u32, channels: u32) -> Self {
237        let num_samples = (duration * sample_rate as f32 * channels as f32) as usize;
238        Self::new(vec![0.0; num_samples], sample_rate, channels)
239    }
240
241    /// Create from samples with default mono, 48kHz
242    pub fn from_samples(samples: Vec<f32>, sample_rate: f32) -> Self {
243        Self::new(samples, sample_rate as u32, 1)
244    }
245
246    /// Create sine wave
247    pub fn sine_wave(frequency: f32, duration: f32, sample_rate: u32, amplitude: f32) -> Self {
248        let num_samples = (duration * sample_rate as f32) as usize;
249        let mut samples = Vec::with_capacity(num_samples);
250
251        for i in 0..num_samples {
252            let t = i as f32 / sample_rate as f32;
253            let sample = amplitude * (2.0 * std::f32::consts::PI * frequency * t).sin();
254            samples.push(sample);
255        }
256
257        Self::new(samples, sample_rate, 1)
258    }
259
260    /// Get duration in seconds
261    pub fn duration(&self) -> f32 {
262        self.samples.len() as f32 / (self.sample_rate * self.channels) as f32
263    }
264
265    /// Get sample rate
266    pub fn sample_rate(&self) -> u32 {
267        self.sample_rate
268    }
269
270    /// Get number of channels
271    pub fn channels(&self) -> u32 {
272        self.channels
273    }
274
275    /// Check if buffer is empty
276    pub fn is_empty(&self) -> bool {
277        self.samples.is_empty()
278    }
279
280    /// Get samples
281    pub fn samples(&self) -> &[f32] {
282        &self.samples
283    }
284
285    /// Get mutable samples
286    pub fn samples_mut(&mut self) -> &mut [f32] {
287        &mut self.samples
288    }
289
290    /// Get number of samples
291    pub fn len(&self) -> usize {
292        self.samples.len()
293    }
294
295    /// Get peak amplitude
296    pub fn peak_amplitude(&self) -> f32 {
297        self.samples.iter().map(|&x| x.abs()).fold(0.0f32, f32::max)
298    }
299
300    /// Normalize to peak amplitude
301    pub fn normalize_to_peak(&mut self, target_peak: f32) {
302        let current_peak = self.peak_amplitude();
303        if current_peak > 0.0 {
304            let gain = target_peak / current_peak;
305            for sample in &mut self.samples {
306                *sample *= gain;
307            }
308        }
309    }
310}
311
312/// Features supported by vocoders
313#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
314pub enum VocoderFeature {
315    /// Base vocoding functionality
316    Base,
317    /// Real-time streaming inference
318    StreamingInference,
319    /// Batch processing
320    BatchProcessing,
321    /// GPU acceleration
322    GpuAcceleration,
323    /// High quality synthesis
324    HighQuality,
325    /// Real-time processing
326    RealtimeProcessing,
327    /// Fast inference with reduced quality
328    FastInference,
329    /// Emotion-aware vocoding
330    EmotionConditioning,
331    /// Emotion variant alias
332    Emotion,
333    /// Real-time voice conversion
334    VoiceConversion,
335    /// Age transformation
336    AgeTransformation,
337    /// Gender transformation
338    GenderTransformation,
339    /// Voice morphing capabilities
340    VoiceMorphing,
341    /// Singing voice synthesis
342    SingingVoice,
343    /// Singing voice feature alias
344    Singing,
345    /// 3D spatial audio processing
346    SpatialAudio,
347    /// Spatial audio feature alias
348    Spatial,
349}
350
351/// Vocoder metadata
352#[derive(Debug, Clone, Serialize, Deserialize)]
353pub struct VocoderMetadata {
354    /// Model name
355    pub name: String,
356    /// Model version
357    pub version: String,
358    /// Model architecture
359    pub architecture: String,
360    /// Sample rate
361    pub sample_rate: u32,
362    /// Number of mel channels expected
363    pub mel_channels: u32,
364    /// Latency in milliseconds
365    pub latency_ms: f32,
366    /// Quality score (1-5)
367    pub quality_score: f32,
368}
369
370/// Trait for neural vocoders
371///
372/// This trait provides the core interface for converting mel spectrograms into audio
373/// using neural vocoding techniques like HiFi-GAN, WaveGlow, or other modern architectures.
374///
375/// # Example
376///
377/// ```rust,ignore
378/// // Conceptual example of trait usage
379/// use voirs_vocoder::{Vocoder, MelSpectrogram, SynthesisConfig};
380///
381/// async fn convert_to_audio<V: Vocoder>(vocoder: &V, mel: MelSpectrogram) {
382///     let config = SynthesisConfig::default();
383///     // let audio_buffer = vocoder.vocode(&mel, Some(&config)).await?;
384/// }
385/// ```
386#[async_trait]
387pub trait Vocoder: Send + Sync {
388    /// Convert mel spectrogram to audio
389    ///
390    /// This method takes a mel spectrogram and converts it to high-quality audio
391    /// using the vocoder's neural network architecture.
392    ///
393    /// # Arguments
394    ///
395    /// * `mel` - The input mel spectrogram to convert
396    /// * `config` - Optional synthesis configuration for quality/performance tuning
397    ///
398    /// # Returns
399    ///
400    /// Returns an `AudioBuffer` containing the generated audio samples
401    async fn vocode(
402        &self,
403        mel: &MelSpectrogram,
404        config: Option<&SynthesisConfig>,
405    ) -> Result<AudioBuffer>;
406
407    /// Streaming vocoding for real-time applications
408    ///
409    /// This method processes a stream of mel spectrograms and produces a stream
410    /// of audio buffers, enabling real-time audio synthesis with low latency.
411    ///
412    /// # Arguments
413    ///
414    /// * `mel_stream` - Stream of mel spectrograms to process
415    /// * `config` - Optional synthesis configuration
416    ///
417    /// # Returns
418    ///
419    /// Returns a stream of `AudioBuffer` results for continuous playback
420    async fn vocode_stream(
421        &self,
422        mel_stream: Box<dyn Stream<Item = MelSpectrogram> + Send + Unpin>,
423        config: Option<&SynthesisConfig>,
424    ) -> Result<Box<dyn Stream<Item = Result<AudioBuffer>> + Send + Unpin>>;
425
426    /// Batch vocoding for efficient processing of multiple inputs
427    ///
428    /// Processes multiple mel spectrograms in a single batch operation,
429    /// which can be more efficient than individual calls for large workloads.
430    ///
431    /// # Arguments
432    ///
433    /// * `mels` - Array of mel spectrograms to process
434    /// * `configs` - Optional array of synthesis configurations (one per mel)
435    ///
436    /// # Returns
437    ///
438    /// Returns a vector of `AudioBuffer` results in the same order as inputs
439    async fn vocode_batch(
440        &self,
441        mels: &[MelSpectrogram],
442        configs: Option<&[SynthesisConfig]>,
443    ) -> Result<Vec<AudioBuffer>>;
444
445    /// Get vocoder metadata and capabilities
446    ///
447    /// Returns information about the vocoder including supported sample rates,
448    /// model architecture, quality settings, and performance characteristics.
449    fn metadata(&self) -> VocoderMetadata;
450
451    /// Check if vocoder supports a specific feature
452    ///
453    /// Use this method to query whether the vocoder implementation supports
454    /// specific features like streaming, batch processing, or quality modes.
455    ///
456    /// # Arguments
457    ///
458    /// * `feature` - The feature to check for support
459    ///
460    /// # Returns
461    ///
462    /// Returns `true` if the feature is supported, `false` otherwise
463    fn supports(&self, feature: VocoderFeature) -> bool;
464}
465
466pub mod adaptive_quality;
467pub mod audio;
468pub mod backends;
469pub mod broadcast_quality;
470pub mod cache;
471pub mod codecs;
472pub mod comprehensive_quality_metrics;
473pub mod conditioning;
474pub mod config;
475pub mod containers;
476pub mod conversion;
477pub mod drivers;
478pub mod effects;
479pub mod hifigan;
480pub mod loss;
481pub mod metrics;
482pub mod ml;
483pub mod models;
484pub mod optimization_paths;
485pub mod parallel;
486pub mod performance;
487pub mod post_processing;
488pub mod profiling;
489pub mod simd;
490pub mod streaming;
491pub mod utils;
492pub mod waveglow;
493
494/// Prelude for convenient imports
495pub mod prelude {
496    pub use crate::effects::AudioQualityMetrics;
497    pub use crate::utils::audio_processing::{
498        apply_adaptive_noise_gate, apply_formant_enhancement, apply_intelligent_agc,
499        apply_psychoacoustic_masking, apply_stereo_widening, calculate_audio_quality_metrics,
500        calculate_spectral_statistics, crossfade_audio, CrossfadeType, SpectralStatistics,
501    };
502    pub use crate::{
503        adaptive_quality::{
504            AdaptationStats, AdaptiveConfig, AdaptiveQualityController, PrecisionMode,
505            QualityAdjustment, QualityTarget,
506        },
507        conditioning::{
508            ConditioningConfigBuilder, EnhancementConfig, ProsodyConfig, SpeakerConfig,
509            VocoderConditioner, VocoderConditioningConfig, VoiceCharacteristics,
510        },
511        conversion::{VoiceConversionConfig, VoiceConverter, VoiceMorpher},
512        hifigan::{EmotionConfig, EmotionVocodingParams},
513        performance::{
514            PerformanceAlert, PerformanceMetrics, PerformanceMonitor, PerformanceStatistics,
515            PerformanceThresholds,
516        },
517        AudioBuffer, DummyVocoder, HiFiGanVocoder, LanguageCode, MelSpectrogram, Result,
518        SynthesisConfig, Vocoder, VocoderError, VocoderFeature, VocoderManager, VocoderMetadata,
519    };
520    pub use async_trait::async_trait;
521}
522
523// Re-export commonly used types
524pub use hifigan::HiFiGanVocoder;
525pub use models::hifigan::{HiFiGanConfig, HiFiGanVariant, HiFiGanVariants};
526pub use streaming::{StreamHandle, StreamingPipeline, StreamingStats, StreamingVocoder};
527
528// Types are already public in the root module
529
530/// Vocoder manager with multiple architecture support
531pub struct VocoderManager {
532    vocoders: HashMap<String, Box<dyn Vocoder>>,
533    default_vocoder: Option<String>,
534}
535
536impl VocoderManager {
537    /// Create new vocoder manager
538    pub fn new() -> Self {
539        Self {
540            vocoders: HashMap::new(),
541            default_vocoder: None,
542        }
543    }
544
545    /// Add vocoder
546    pub fn add_vocoder(&mut self, name: String, vocoder: Box<dyn Vocoder>) {
547        self.vocoders.insert(name.clone(), vocoder);
548
549        // Set as default if it's the first vocoder
550        if self.default_vocoder.is_none() {
551            self.default_vocoder = Some(name);
552        }
553    }
554
555    /// Set default vocoder
556    pub fn set_default_vocoder(&mut self, name: String) {
557        if self.vocoders.contains_key(&name) {
558            self.default_vocoder = Some(name);
559        }
560    }
561
562    /// Get vocoder by name
563    pub fn get_vocoder(&self, name: &str) -> Result<&dyn Vocoder> {
564        self.vocoders
565            .get(name)
566            .map(|v| v.as_ref())
567            .ok_or_else(|| VocoderError::ModelError(format!("Vocoder '{name}' not found")))
568    }
569
570    /// Get default vocoder
571    pub fn get_default_vocoder(&self) -> Result<&dyn Vocoder> {
572        let name = self
573            .default_vocoder
574            .as_ref()
575            .ok_or_else(|| VocoderError::ConfigError("No default vocoder set".to_string()))?;
576        self.get_vocoder(name)
577    }
578
579    /// List available vocoders
580    pub fn list_vocoders(&self) -> Vec<&str> {
581        self.vocoders.keys().map(|s| s.as_str()).collect()
582    }
583}
584
585impl Default for VocoderManager {
586    fn default() -> Self {
587        Self::new()
588    }
589}
590
591#[async_trait]
592impl Vocoder for VocoderManager {
593    async fn vocode(
594        &self,
595        mel: &MelSpectrogram,
596        config: Option<&SynthesisConfig>,
597    ) -> Result<AudioBuffer> {
598        let vocoder = self.get_default_vocoder()?;
599        vocoder.vocode(mel, config).await
600    }
601
602    async fn vocode_stream(
603        &self,
604        mel_stream: Box<dyn Stream<Item = MelSpectrogram> + Send + Unpin>,
605        config: Option<&SynthesisConfig>,
606    ) -> Result<Box<dyn Stream<Item = Result<AudioBuffer>> + Send + Unpin>> {
607        let vocoder = self.get_default_vocoder()?;
608        vocoder.vocode_stream(mel_stream, config).await
609    }
610
611    async fn vocode_batch(
612        &self,
613        mels: &[MelSpectrogram],
614        configs: Option<&[SynthesisConfig]>,
615    ) -> Result<Vec<AudioBuffer>> {
616        let vocoder = self.get_default_vocoder()?;
617        vocoder.vocode_batch(mels, configs).await
618    }
619
620    fn metadata(&self) -> VocoderMetadata {
621        if let Ok(vocoder) = self.get_default_vocoder() {
622            vocoder.metadata()
623        } else {
624            VocoderMetadata {
625                name: "Vocoder Manager".to_string(),
626                version: env!("CARGO_PKG_VERSION").to_string(),
627                architecture: "Manager".to_string(),
628                sample_rate: 22050,
629                mel_channels: 80,
630                latency_ms: 0.0,
631                quality_score: 0.0,
632            }
633        }
634    }
635
636    fn supports(&self, feature: VocoderFeature) -> bool {
637        if let Ok(vocoder) = self.get_default_vocoder() {
638            vocoder.supports(feature)
639        } else {
640            false
641        }
642    }
643}
644
645// Type conversions are handled at the SDK level to avoid circular dependencies
646
647/// Dummy vocoder for testing
648pub struct DummyVocoder {
649    sample_rate: u32,
650    mel_channels: u32,
651}
652
653impl DummyVocoder {
654    /// Create new dummy vocoder
655    pub fn new() -> Self {
656        Self {
657            sample_rate: 22050,
658            mel_channels: 80,
659        }
660    }
661
662    /// Create with custom parameters
663    pub fn with_config(sample_rate: u32, mel_channels: u32) -> Self {
664        Self {
665            sample_rate,
666            mel_channels,
667        }
668    }
669}
670
671impl Default for DummyVocoder {
672    fn default() -> Self {
673        Self::new()
674    }
675}
676
677#[async_trait]
678impl Vocoder for DummyVocoder {
679    async fn vocode(
680        &self,
681        mel: &MelSpectrogram,
682        _config: Option<&SynthesisConfig>,
683    ) -> Result<AudioBuffer> {
684        // Generate sine wave based on mel spectrogram duration
685        let duration = mel.duration();
686        let frequency = 440.0; // A4 note
687        let audio = AudioBuffer::sine_wave(frequency, duration, self.sample_rate, 0.5);
688
689        tracing::debug!(
690            "DummyVocoder: Generated {:.2}s audio from {}x{} mel",
691            duration,
692            mel.n_mels,
693            mel.n_frames
694        );
695        Ok(audio)
696    }
697
698    async fn vocode_stream(
699        &self,
700        mel_stream: Box<dyn Stream<Item = MelSpectrogram> + Send + Unpin>,
701        config: Option<&SynthesisConfig>,
702    ) -> Result<Box<dyn Stream<Item = Result<AudioBuffer>> + Send + Unpin>> {
703        use futures::stream;
704        use futures::StreamExt;
705
706        // Collect all mels first, then process them
707        let mels: Vec<MelSpectrogram> = mel_stream.collect().await;
708        let configs = config.map(|c| vec![c.clone(); mels.len()]);
709
710        let results = self.vocode_batch(&mels, configs.as_deref()).await?;
711        let stream = stream::iter(results.into_iter().map(Ok));
712
713        Ok(Box::new(stream))
714    }
715
716    async fn vocode_batch(
717        &self,
718        mels: &[MelSpectrogram],
719        configs: Option<&[SynthesisConfig]>,
720    ) -> Result<Vec<AudioBuffer>> {
721        let mut results = Vec::new();
722        for (i, mel) in mels.iter().enumerate() {
723            let config = configs.and_then(|c| c.get(i));
724            results.push(self.vocode(mel, config).await?);
725        }
726        Ok(results)
727    }
728
729    fn metadata(&self) -> VocoderMetadata {
730        VocoderMetadata {
731            name: "Dummy Vocoder".to_string(),
732            version: "0.1.0".to_string(),
733            architecture: "Sine Wave".to_string(),
734            sample_rate: self.sample_rate,
735            mel_channels: self.mel_channels,
736            latency_ms: 10.0,
737            quality_score: 2.0, // Low quality sine wave
738        }
739    }
740
741    fn supports(&self, feature: VocoderFeature) -> bool {
742        matches!(feature, VocoderFeature::BatchProcessing)
743    }
744}
745
746#[cfg(test)]
747mod tests {
748    use super::*;
749
750    #[tokio::test]
751    async fn test_vocoder_manager() {
752        let mut manager = VocoderManager::new();
753
754        // Add dummy vocoder
755        manager.add_vocoder("dummy".to_string(), Box::new(DummyVocoder::new()));
756
757        // Test vocoding
758        let mel_data = vec![vec![0.5; 100]; 80]; // 80 mel channels, 100 frames
759        let mel = MelSpectrogram::new(mel_data, 22050, 256);
760
761        let audio = manager.vocode(&mel, None).await.unwrap();
762        assert!(audio.duration() > 0.0);
763        assert_eq!(audio.sample_rate(), 22050);
764
765        // Test vocoder listing
766        let vocoders = manager.list_vocoders();
767        assert!(vocoders.contains(&"dummy"));
768    }
769
770    #[tokio::test]
771    async fn test_dummy_vocoder() {
772        let vocoder = DummyVocoder::new();
773
774        // Create test mel spectrogram
775        let mel_data = vec![vec![0.5; 50]; 80]; // 80 mel channels, 50 frames
776        let mel = MelSpectrogram::new(mel_data, 22050, 256);
777
778        let audio = vocoder.vocode(&mel, None).await.unwrap();
779
780        // Check audio properties
781        assert!(audio.duration() > 0.0);
782        assert_eq!(audio.sample_rate(), 22050);
783        assert!(!audio.is_empty());
784
785        // Test metadata
786        let metadata = vocoder.metadata();
787        assert_eq!(metadata.name, "Dummy Vocoder");
788        assert_eq!(metadata.sample_rate, 22050);
789        assert_eq!(metadata.mel_channels, 80);
790
791        // Test batch vocoding
792        let mels = vec![mel.clone(), mel.clone()];
793        let results = vocoder.vocode_batch(&mels, None).await.unwrap();
794        assert_eq!(results.len(), 2);
795
796        for audio in results {
797            assert!(audio.duration() > 0.0);
798            assert_eq!(audio.sample_rate(), 22050);
799        }
800
801        // Test feature support
802        assert!(vocoder.supports(VocoderFeature::BatchProcessing));
803        assert!(!vocoder.supports(VocoderFeature::StreamingInference));
804    }
805}
voirs_vocoder/lib.rs

voirs_vocoder/
lib.rs