Skip to main content

voirs_conversion/
types.rs

1//! Core types for voice conversion
2
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::str::FromStr;
6use std::time::{Duration, SystemTime};
7
8/// Voice conversion types
9#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
10pub enum ConversionType {
11    /// Convert to a specific speaker
12    #[default]
13    SpeakerConversion,
14    /// Transform age characteristics
15    AgeTransformation,
16    /// Transform gender characteristics
17    GenderTransformation,
18    /// General pitch shifting
19    PitchShift,
20    /// Speed/tempo transformation
21    SpeedTransformation,
22    /// Voice morphing between multiple sources
23    VoiceMorphing,
24    /// Emotional transformation
25    EmotionalTransformation,
26    /// Zero-shot conversion to unseen target voices
27    ZeroShotConversion,
28    /// Pass through with minimal processing (for testing)
29    PassThrough,
30    /// Custom transformation
31    Custom(String),
32}
33
34impl ConversionType {
35    /// Get the string representation
36    pub fn as_str(&self) -> &str {
37        match self {
38            ConversionType::SpeakerConversion => "speaker_conversion",
39            ConversionType::AgeTransformation => "age_transformation",
40            ConversionType::GenderTransformation => "gender_transformation",
41            ConversionType::PitchShift => "pitch_shift",
42            ConversionType::SpeedTransformation => "speed_transformation",
43            ConversionType::VoiceMorphing => "voice_morphing",
44            ConversionType::EmotionalTransformation => "emotional_transformation",
45            ConversionType::ZeroShotConversion => "zero_shot_conversion",
46            ConversionType::PassThrough => "pass_through",
47            ConversionType::Custom(name) => name,
48        }
49    }
50
51    /// Parse from string
52    pub fn parse_type(s: &str) -> Option<Self> {
53        match s.to_lowercase().as_str() {
54            "speaker_conversion" => Some(ConversionType::SpeakerConversion),
55            "age_transformation" => Some(ConversionType::AgeTransformation),
56            "gender_transformation" => Some(ConversionType::GenderTransformation),
57            "pitch_shift" => Some(ConversionType::PitchShift),
58            "speed_transformation" => Some(ConversionType::SpeedTransformation),
59            "voice_morphing" => Some(ConversionType::VoiceMorphing),
60            "emotional_transformation" => Some(ConversionType::EmotionalTransformation),
61            "zero_shot_conversion" => Some(ConversionType::ZeroShotConversion),
62            "pass_through" => Some(ConversionType::PassThrough),
63            _ => Some(ConversionType::Custom(s.to_string())),
64        }
65    }
66
67    /// Check if conversion type supports real-time processing
68    pub fn supports_realtime(&self) -> bool {
69        match self {
70            ConversionType::PitchShift => true,
71            ConversionType::SpeedTransformation => true,
72            ConversionType::SpeakerConversion => true,
73            ConversionType::VoiceMorphing => false, // Requires complex processing
74            ConversionType::AgeTransformation => true,
75            ConversionType::GenderTransformation => true,
76            ConversionType::EmotionalTransformation => true,
77            ConversionType::ZeroShotConversion => false, // Requires complex analysis of unseen voices
78            ConversionType::PassThrough => true,         // Fastest possible processing
79            ConversionType::Custom(_) => false,          // Conservative default
80        }
81    }
82}
83
84impl FromStr for ConversionType {
85    type Err = String;
86
87    fn from_str(s: &str) -> Result<Self, Self::Err> {
88        Self::parse_type(s).ok_or_else(|| format!("Unknown conversion type: {s}"))
89    }
90}
91
92/// Voice characteristics for conversion targets
93#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
94pub struct VoiceCharacteristics {
95    /// Fundamental frequency parameters
96    pub pitch: PitchCharacteristics,
97    /// Temporal characteristics
98    pub timing: TimingCharacteristics,
99    /// Spectral characteristics
100    pub spectral: SpectralCharacteristics,
101    /// Voice quality parameters
102    pub quality: QualityCharacteristics,
103    /// Age group
104    pub age_group: Option<AgeGroup>,
105    /// Gender
106    pub gender: Option<Gender>,
107    /// Accent/dialect
108    pub accent: Option<String>,
109    /// Custom characteristics
110    pub custom_params: HashMap<String, f32>,
111}
112
113impl VoiceCharacteristics {
114    /// Create new voice characteristics
115    pub fn new() -> Self {
116        Self::default()
117    }
118
119    /// Create characteristics for specific age
120    pub fn for_age(age_group: AgeGroup) -> Self {
121        let mut chars = Self::new();
122        chars.age_group = Some(age_group);
123
124        // Adjust characteristics based on age
125        match age_group {
126            AgeGroup::Child => {
127                chars.pitch.mean_f0 = 250.0; // Higher pitch for children
128                chars.timing.speaking_rate = 1.1; // Slightly faster
129                chars.quality.breathiness = 0.2;
130            }
131            AgeGroup::Teen => {
132                chars.pitch.mean_f0 = 200.0;
133                chars.timing.speaking_rate = 1.2; // Faster speech
134                chars.quality.roughness = 0.1;
135            }
136            AgeGroup::YoungAdult => {
137                chars.pitch.mean_f0 = 150.0;
138                chars.timing.speaking_rate = 1.0; // Normal rate
139            }
140            AgeGroup::Adult => {
141                chars.pitch.mean_f0 = 145.0;
142                chars.timing.speaking_rate = 0.98; // Slightly slower than young adult
143                chars.quality.stability = 0.85;
144            }
145            AgeGroup::MiddleAged => {
146                chars.pitch.mean_f0 = 140.0;
147                chars.timing.speaking_rate = 0.95; // Slightly slower
148                chars.quality.stability = 0.9;
149            }
150            AgeGroup::Senior => {
151                chars.pitch.mean_f0 = 130.0;
152                chars.timing.speaking_rate = 0.85; // Slower speech
153                chars.quality.breathiness = 0.3;
154                chars.quality.roughness = 0.2;
155            }
156            AgeGroup::Unknown => {}
157        }
158
159        chars
160    }
161
162    /// Create characteristics for specific gender
163    pub fn for_gender(gender: Gender) -> Self {
164        let mut chars = Self::new();
165        chars.gender = Some(gender);
166
167        // Adjust characteristics based on gender
168        match gender {
169            Gender::Male => {
170                chars.pitch.mean_f0 = 120.0; // Lower pitch
171                chars.spectral.formant_shift = -0.1; // Lower formants
172                chars.quality.roughness = 0.15;
173            }
174            Gender::Female => {
175                chars.pitch.mean_f0 = 200.0; // Higher pitch
176                chars.spectral.formant_shift = 0.1; // Higher formants
177                chars.quality.breathiness = 0.1;
178            }
179            Gender::NonBinary | Gender::Other | Gender::Unknown => {
180                chars.pitch.mean_f0 = 160.0; // Neutral pitch
181            }
182        }
183
184        chars
185    }
186
187    /// Interpolate between two voice characteristics
188    pub fn interpolate(&self, other: &Self, factor: f32) -> Self {
189        let t = factor.clamp(0.0, 1.0);
190        let inv_t = 1.0 - t;
191
192        let mut result = self.clone();
193
194        // Interpolate pitch characteristics
195        result.pitch.mean_f0 = self.pitch.mean_f0 * inv_t + other.pitch.mean_f0 * t;
196        result.pitch.range = self.pitch.range * inv_t + other.pitch.range * t;
197        result.pitch.jitter = self.pitch.jitter * inv_t + other.pitch.jitter * t;
198
199        // Interpolate timing characteristics
200        result.timing.speaking_rate =
201            self.timing.speaking_rate * inv_t + other.timing.speaking_rate * t;
202        result.timing.pause_duration =
203            self.timing.pause_duration * inv_t + other.timing.pause_duration * t;
204
205        // Interpolate spectral characteristics
206        result.spectral.formant_shift =
207            self.spectral.formant_shift * inv_t + other.spectral.formant_shift * t;
208        result.spectral.brightness =
209            self.spectral.brightness * inv_t + other.spectral.brightness * t;
210
211        // Interpolate quality characteristics
212        result.quality.breathiness =
213            self.quality.breathiness * inv_t + other.quality.breathiness * t;
214        result.quality.roughness = self.quality.roughness * inv_t + other.quality.roughness * t;
215        result.quality.stability = self.quality.stability * inv_t + other.quality.stability * t;
216
217        // Interpolate custom parameters
218        for (key, &value) in &self.custom_params {
219            if let Some(&other_value) = other.custom_params.get(key) {
220                result
221                    .custom_params
222                    .insert(key.clone(), value * inv_t + other_value * t);
223            }
224        }
225
226        result
227    }
228}
229
230/// Pitch-related characteristics
231#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
232pub struct PitchCharacteristics {
233    /// Mean fundamental frequency (Hz)
234    pub mean_f0: f32,
235    /// Pitch range (semitones)
236    pub range: f32,
237    /// Pitch jitter (0.0 to 1.0)
238    pub jitter: f32,
239    /// Pitch stability (0.0 to 1.0)
240    pub stability: f32,
241}
242
243impl Default for PitchCharacteristics {
244    fn default() -> Self {
245        Self {
246            mean_f0: 150.0,
247            range: 12.0, // One octave
248            jitter: 0.1,
249            stability: 0.8,
250        }
251    }
252}
253
254/// Timing-related characteristics
255#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
256pub struct TimingCharacteristics {
257    /// Speaking rate (relative to normal)
258    pub speaking_rate: f32,
259    /// Pause duration scale
260    pub pause_duration: f32,
261    /// Rhythm regularity (0.0 to 1.0)
262    pub rhythm_regularity: f32,
263}
264
265impl Default for TimingCharacteristics {
266    fn default() -> Self {
267        Self {
268            speaking_rate: 1.0,
269            pause_duration: 1.0,
270            rhythm_regularity: 0.7,
271        }
272    }
273}
274
275/// Spectral characteristics
276#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
277pub struct SpectralCharacteristics {
278    /// Formant frequency shift (relative)
279    pub formant_shift: f32,
280    /// Spectral brightness (-1.0 to 1.0)
281    pub brightness: f32,
282    /// Spectral tilt
283    pub spectral_tilt: f32,
284    /// Harmonicity
285    pub harmonicity: f32,
286}
287
288impl Default for SpectralCharacteristics {
289    fn default() -> Self {
290        Self {
291            formant_shift: 0.0,
292            brightness: 0.0,
293            spectral_tilt: 0.0,
294            harmonicity: 0.8,
295        }
296    }
297}
298
299/// Voice quality characteristics
300#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
301pub struct QualityCharacteristics {
302    /// Breathiness (0.0 to 1.0)
303    pub breathiness: f32,
304    /// Roughness (0.0 to 1.0)
305    pub roughness: f32,
306    /// Voice stability (0.0 to 1.0)
307    pub stability: f32,
308    /// Resonance quality (0.0 to 1.0)
309    pub resonance: f32,
310}
311
312impl Default for QualityCharacteristics {
313    fn default() -> Self {
314        Self {
315            breathiness: 0.1,
316            roughness: 0.1,
317            stability: 0.8,
318            resonance: 0.7,
319        }
320    }
321}
322
323/// Age group classification
324#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
325pub enum AgeGroup {
326    /// Child (under 12)
327    Child,
328    /// Teenager (12-19)
329    Teen,
330    /// Young adult (20-35)
331    YoungAdult,
332    /// Adult (20-55)
333    Adult,
334    /// Middle-aged (36-55)
335    MiddleAged,
336    /// Senior (55+)
337    Senior,
338    /// Unknown/unclassified
339    #[default]
340    Unknown,
341}
342
343/// Gender classification
344#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
345pub enum Gender {
346    /// Male voice
347    Male,
348    /// Female voice
349    Female,
350    /// Non-binary voice
351    NonBinary,
352    /// Non-binary/other
353    Other,
354    /// Unknown/unclassified
355    #[default]
356    Unknown,
357}
358
359/// Conversion target specification
360#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
361pub struct ConversionTarget {
362    /// Target voice characteristics
363    pub characteristics: VoiceCharacteristics,
364    /// Target speaker ID (if applicable)
365    pub speaker_id: Option<String>,
366    /// Reference audio samples (if available)
367    pub reference_samples: Vec<AudioSample>,
368    /// Conversion strength (0.0 to 1.0)
369    pub strength: f32,
370    /// Preserve original characteristics partially
371    pub preserve_original: f32,
372}
373
374impl ConversionTarget {
375    /// Create new conversion target
376    pub fn new(characteristics: VoiceCharacteristics) -> Self {
377        Self {
378            characteristics,
379            speaker_id: None,
380            reference_samples: Vec::new(),
381            strength: 1.0,
382            preserve_original: 0.0,
383        }
384    }
385
386    /// Set target speaker
387    pub fn with_speaker_id(mut self, speaker_id: String) -> Self {
388        self.speaker_id = Some(speaker_id);
389        self
390    }
391
392    /// Add reference sample
393    pub fn with_reference_sample(mut self, sample: AudioSample) -> Self {
394        self.reference_samples.push(sample);
395        self
396    }
397
398    /// Set conversion strength
399    pub fn with_strength(mut self, strength: f32) -> Self {
400        self.strength = strength.clamp(0.0, 1.0);
401        self
402    }
403
404    /// Set preservation amount
405    pub fn with_preservation(mut self, preserve: f32) -> Self {
406        self.preserve_original = preserve.clamp(0.0, 1.0);
407        self
408    }
409}
410
411/// Audio sample for reference
412#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
413pub struct AudioSample {
414    /// Sample ID
415    pub id: String,
416    /// Audio data (PCM samples)
417    pub audio: Vec<f32>,
418    /// Sample rate
419    pub sample_rate: u32,
420    /// Duration in seconds
421    pub duration: f32,
422    /// Sample metadata
423    pub metadata: HashMap<String, String>,
424}
425
426impl AudioSample {
427    /// Create new audio sample
428    pub fn new(id: String, audio: Vec<f32>, sample_rate: u32) -> Self {
429        let duration = audio.len() as f32 / sample_rate as f32;
430        Self {
431            id,
432            audio,
433            sample_rate,
434            duration,
435            metadata: HashMap::new(),
436        }
437    }
438
439    /// Add metadata
440    pub fn with_metadata(mut self, key: String, value: String) -> Self {
441        self.metadata.insert(key, value);
442        self
443    }
444}
445
446/// Voice conversion request
447#[derive(Debug, Clone, Serialize, Deserialize)]
448pub struct ConversionRequest {
449    /// Request ID
450    pub id: String,
451    /// Source audio data
452    pub source_audio: Vec<f32>,
453    /// Source sample rate
454    pub source_sample_rate: u32,
455    /// Conversion type
456    pub conversion_type: ConversionType,
457    /// Conversion target
458    pub target: ConversionTarget,
459    /// Real-time processing flag
460    pub realtime: bool,
461    /// Quality level (0.0 to 1.0)
462    pub quality_level: f32,
463    /// Processing parameters
464    pub parameters: HashMap<String, f32>,
465    /// Request timestamp
466    pub timestamp: SystemTime,
467}
468
469impl ConversionRequest {
470    /// Create new conversion request
471    pub fn new(
472        id: String,
473        source_audio: Vec<f32>,
474        source_sample_rate: u32,
475        conversion_type: ConversionType,
476        target: ConversionTarget,
477    ) -> Self {
478        Self {
479            id,
480            source_audio,
481            source_sample_rate,
482            conversion_type,
483            target,
484            realtime: false,
485            quality_level: 0.8,
486            parameters: HashMap::new(),
487            timestamp: SystemTime::now(),
488        }
489    }
490
491    /// Enable real-time processing
492    pub fn with_realtime(mut self, realtime: bool) -> Self {
493        self.realtime = realtime;
494        self
495    }
496
497    /// Set quality level
498    pub fn with_quality_level(mut self, level: f32) -> Self {
499        self.quality_level = level.clamp(0.0, 1.0);
500        self
501    }
502
503    /// Add parameter
504    pub fn with_parameter(mut self, key: String, value: f32) -> Self {
505        self.parameters.insert(key, value);
506        self
507    }
508
509    /// Validate the request
510    pub fn validate(&self) -> crate::Result<()> {
511        if self.source_audio.is_empty() {
512            return Err(crate::Error::Validation {
513                message: "Source audio cannot be empty".to_string(),
514                field: Some("source_audio".to_string()),
515                expected: Some("Non-empty audio data".to_string()),
516                actual: Some("Empty audio data".to_string()),
517                context: None,
518                recovery_suggestions: Box::new(vec![
519                    "Provide valid audio data".to_string(),
520                    "Check audio file loading".to_string(),
521                ]),
522            });
523        }
524
525        if self.source_sample_rate == 0 {
526            return Err(crate::Error::Validation {
527                message: "Source sample rate must be positive".to_string(),
528                field: Some("source_sample_rate".to_string()),
529                expected: Some("Positive sample rate".to_string()),
530                actual: Some(format!("{}", self.source_sample_rate)),
531                context: None,
532                recovery_suggestions: Box::new(vec![
533                    "Set sample rate to a positive value (e.g., 44100, 48000)".to_string(),
534                    "Check audio metadata".to_string(),
535                ]),
536            });
537        }
538
539        if self.realtime && !self.conversion_type.supports_realtime() {
540            return Err(crate::Error::Validation {
541                message: format!(
542                    "Conversion type {:?} does not support real-time processing",
543                    self.conversion_type
544                ),
545                field: Some("realtime".to_string()),
546                expected: Some("False for non-realtime conversion types".to_string()),
547                actual: Some("True".to_string()),
548                context: None,
549                recovery_suggestions: Box::new(vec![
550                    "Set realtime to false".to_string(),
551                    "Use a different conversion type that supports real-time processing"
552                        .to_string(),
553                ]),
554            });
555        }
556
557        Ok(())
558    }
559
560    /// Get source duration in seconds
561    pub fn source_duration(&self) -> f32 {
562        self.source_audio.len() as f32 / self.source_sample_rate as f32
563    }
564}
565
566/// Voice conversion result
567#[derive(Debug, Clone, Serialize, Deserialize)]
568pub struct ConversionResult {
569    /// Request ID this result corresponds to
570    pub request_id: String,
571    /// Converted audio data
572    pub converted_audio: Vec<f32>,
573    /// Output sample rate
574    pub output_sample_rate: u32,
575    /// Legacy conversion quality metrics (for compatibility)
576    pub quality_metrics: HashMap<String, f32>,
577    /// Comprehensive artifact detection results
578    pub artifacts: Option<DetectedArtifacts>,
579    /// Objective quality assessment results
580    pub objective_quality: Option<ObjectiveQualityMetrics>,
581    /// Processing time
582    pub processing_time: Duration,
583    /// Conversion type used
584    pub conversion_type: ConversionType,
585    /// Success status
586    pub success: bool,
587    /// Error message if failed
588    pub error_message: Option<String>,
589    /// Result timestamp
590    pub timestamp: SystemTime,
591}
592
593/// Detected artifacts in conversion result
594#[derive(Debug, Clone, Serialize, Deserialize)]
595pub struct DetectedArtifacts {
596    /// Overall artifact score (0.0 = clean, 1.0 = heavily artifacted)
597    pub overall_score: f32,
598    /// Individual artifact types and their scores
599    pub artifact_types: HashMap<String, f32>,
600    /// Number of detected artifact locations
601    pub artifact_count: usize,
602    /// Quality assessment from artifacts
603    pub quality_assessment: QualityAssessment,
604}
605
606/// Quality assessment for conversion result
607#[derive(Debug, Clone, Serialize, Deserialize)]
608pub struct QualityAssessment {
609    /// Overall quality score (0.0 to 1.0)
610    pub overall_quality: f32,
611    /// Naturalness score (0.0 to 1.0)
612    pub naturalness: f32,
613    /// Clarity score (0.0 to 1.0)
614    pub clarity: f32,
615    /// Consistency score (0.0 to 1.0)
616    pub consistency: f32,
617    /// Recommended quality adjustments
618    pub recommended_adjustments: Vec<QualityAdjustment>,
619}
620
621/// Recommended quality adjustment
622#[derive(Debug, Clone, Serialize, Deserialize)]
623pub struct QualityAdjustment {
624    /// Type of adjustment
625    pub adjustment_type: String,
626    /// Recommended strength (0.0 to 1.0)
627    pub strength: f32,
628    /// Expected improvement
629    pub expected_improvement: f32,
630}
631
632/// Objective quality metrics for conversion result
633#[derive(Debug, Clone, Serialize, Deserialize)]
634pub struct ObjectiveQualityMetrics {
635    /// Overall quality score (0.0 to 1.0)
636    pub overall_score: f32,
637    /// Spectral similarity score
638    pub spectral_similarity: f32,
639    /// Temporal consistency score
640    pub temporal_consistency: f32,
641    /// Prosodic preservation score
642    pub prosodic_preservation: f32,
643    /// Naturalness score
644    pub naturalness: f32,
645    /// Perceptual quality score
646    pub perceptual_quality: f32,
647    /// Signal-to-noise ratio estimate
648    pub snr_estimate: f32,
649    /// Segmental SNR
650    pub segmental_snr: f32,
651}
652
653impl ConversionResult {
654    /// Create successful result
655    pub fn success(
656        request_id: String,
657        converted_audio: Vec<f32>,
658        output_sample_rate: u32,
659        processing_time: Duration,
660        conversion_type: ConversionType,
661    ) -> Self {
662        Self {
663            request_id,
664            converted_audio,
665            output_sample_rate,
666            quality_metrics: HashMap::new(),
667            artifacts: None,
668            objective_quality: None,
669            processing_time,
670            conversion_type,
671            success: true,
672            error_message: None,
673            timestamp: SystemTime::now(),
674        }
675    }
676
677    /// Create failed result
678    pub fn failure(
679        request_id: String,
680        error_message: String,
681        processing_time: Duration,
682        conversion_type: ConversionType,
683    ) -> Self {
684        Self {
685            request_id,
686            converted_audio: Vec::new(),
687            output_sample_rate: 0,
688            quality_metrics: HashMap::new(),
689            artifacts: None,
690            objective_quality: None,
691            processing_time,
692            conversion_type,
693            success: false,
694            error_message: Some(error_message),
695            timestamp: SystemTime::now(),
696        }
697    }
698
699    /// Add quality metric
700    pub fn with_quality_metric(mut self, name: String, value: f32) -> Self {
701        self.quality_metrics.insert(name, value);
702        self
703    }
704
705    /// Set artifact detection results
706    pub fn with_artifacts(mut self, artifacts: DetectedArtifacts) -> Self {
707        self.artifacts = Some(artifacts);
708        self
709    }
710
711    /// Set objective quality metrics
712    pub fn with_objective_quality(mut self, quality: ObjectiveQualityMetrics) -> Self {
713        self.objective_quality = Some(quality);
714        self
715    }
716
717    /// Get output duration in seconds
718    pub fn output_duration(&self) -> f32 {
719        if self.output_sample_rate == 0 {
720            return 0.0;
721        }
722        self.converted_audio.len() as f32 / self.output_sample_rate as f32
723    }
724}
725
726#[cfg(test)]
727mod tests {
728    use super::*;
729
730    #[test]
731    fn test_conversion_type_properties() {
732        assert!(ConversionType::PitchShift.supports_realtime());
733        assert!(ConversionType::SpeakerConversion.supports_realtime());
734        assert!(!ConversionType::VoiceMorphing.supports_realtime());
735
736        assert_eq!(ConversionType::PitchShift.as_str(), "pitch_shift");
737        assert_eq!(
738            ConversionType::from_str("pitch_shift").ok(),
739            Some(ConversionType::PitchShift)
740        );
741    }
742
743    #[test]
744    fn test_voice_characteristics_age() {
745        let child_chars = VoiceCharacteristics::for_age(AgeGroup::Child);
746        let senior_chars = VoiceCharacteristics::for_age(AgeGroup::Senior);
747
748        assert!(child_chars.pitch.mean_f0 > senior_chars.pitch.mean_f0);
749        assert!(child_chars.timing.speaking_rate > senior_chars.timing.speaking_rate);
750    }
751
752    #[test]
753    fn test_voice_characteristics_gender() {
754        let male_chars = VoiceCharacteristics::for_gender(Gender::Male);
755        let female_chars = VoiceCharacteristics::for_gender(Gender::Female);
756
757        assert!(male_chars.pitch.mean_f0 < female_chars.pitch.mean_f0);
758        assert!(male_chars.spectral.formant_shift < female_chars.spectral.formant_shift);
759    }
760
761    #[test]
762    fn test_voice_characteristics_interpolation() {
763        let chars1 = VoiceCharacteristics::for_gender(Gender::Male);
764        let chars2 = VoiceCharacteristics::for_gender(Gender::Female);
765
766        let interpolated = chars1.interpolate(&chars2, 0.5);
767
768        let expected_f0 = (chars1.pitch.mean_f0 + chars2.pitch.mean_f0) / 2.0;
769        assert!((interpolated.pitch.mean_f0 - expected_f0).abs() < 0.001);
770    }
771
772    #[test]
773    fn test_conversion_target() {
774        let chars = VoiceCharacteristics::for_age(AgeGroup::YoungAdult);
775        let target = ConversionTarget::new(chars)
776            .with_speaker_id("speaker123".to_string())
777            .with_strength(0.8)
778            .with_preservation(0.2);
779
780        assert_eq!(target.speaker_id, Some("speaker123".to_string()));
781        assert_eq!(target.strength, 0.8);
782        assert_eq!(target.preserve_original, 0.2);
783    }
784
785    #[test]
786    fn test_audio_sample() {
787        let audio = vec![0.1, -0.2, 0.3, -0.4];
788        let sample = AudioSample::new("test".to_string(), audio.clone(), 16000)
789            .with_metadata("quality".to_string(), "high".to_string());
790
791        assert_eq!(sample.audio, audio);
792        assert_eq!(sample.sample_rate, 16000);
793        assert_eq!(sample.duration, 4.0 / 16000.0);
794        assert_eq!(sample.metadata.get("quality"), Some(&"high".to_string()));
795    }
796
797    #[test]
798    fn test_conversion_request_validation() {
799        let chars = VoiceCharacteristics::default();
800        let target = ConversionTarget::new(chars);
801
802        // Valid request
803        let request = ConversionRequest::new(
804            "req1".to_string(),
805            vec![0.1, 0.2, 0.3],
806            16000,
807            ConversionType::PitchShift,
808            target.clone(),
809        );
810        assert!(request.validate().is_ok());
811
812        // Invalid - empty audio
813        let invalid_request = ConversionRequest::new(
814            "req2".to_string(),
815            vec![],
816            16000,
817            ConversionType::PitchShift,
818            target.clone(),
819        );
820        assert!(invalid_request.validate().is_err());
821
822        // Invalid - realtime not supported
823        let realtime_request = ConversionRequest::new(
824            "req3".to_string(),
825            vec![0.1, 0.2],
826            16000,
827            ConversionType::VoiceMorphing,
828            target,
829        )
830        .with_realtime(true);
831        assert!(realtime_request.validate().is_err());
832    }
833
834    #[test]
835    fn test_conversion_result() {
836        let result = ConversionResult::success(
837            "req1".to_string(),
838            vec![0.1, 0.2, 0.3, 0.4],
839            22050,
840            Duration::from_millis(100),
841            ConversionType::PitchShift,
842        );
843
844        assert!(result.success);
845        assert_eq!(result.output_duration(), 4.0 / 22050.0);
846    }
847}