Skip to main content

voirs_g2p/
lib.rs

1//! # VoiRS G2P (Grapheme-to-Phoneme) Conversion
2//!
3//! Converts text to phonemes using various backends including rule-based,
4//! neural, and hybrid approaches for multiple languages.
5
6// Allow pedantic lints that are acceptable for audio/DSP processing code
7#![allow(clippy::cast_precision_loss)] // Acceptable for audio sample conversions
8#![allow(clippy::cast_possible_truncation)] // Controlled truncation in audio processing
9#![allow(clippy::cast_sign_loss)] // Intentional in index calculations
10#![allow(clippy::missing_errors_doc)] // Many internal functions with self-documenting error types
11#![allow(clippy::missing_panics_doc)] // Panics are documented where relevant
12#![allow(clippy::unused_self)] // Some trait implementations require &self for consistency
13#![allow(clippy::must_use_candidate)] // Not all return values need must_use annotation
14#![allow(clippy::doc_markdown)] // Technical terms don't all need backticks
15#![allow(clippy::unnecessary_wraps)] // Result wrappers maintained for API consistency
16#![allow(clippy::float_cmp)] // Exact float comparisons are intentional in some contexts
17#![allow(clippy::match_same_arms)] // Pattern matching clarity sometimes requires duplication
18#![allow(clippy::module_name_repetitions)] // Type names often repeat module names
19#![allow(clippy::struct_excessive_bools)] // Config structs naturally have many boolean flags
20#![allow(clippy::too_many_lines)] // Some functions are inherently complex
21#![allow(clippy::needless_pass_by_value)] // Some functions designed for ownership transfer
22#![allow(clippy::similar_names)] // Many similar variable names in algorithms
23#![allow(clippy::unused_async)] // Public API functions may need async for consistency
24#![allow(clippy::needless_range_loop)] // Range loops sometimes clearer than iterators
25#![allow(clippy::uninlined_format_args)] // Explicit argument names can improve clarity
26#![allow(clippy::manual_clamp)] // Manual clamping sometimes clearer
27#![allow(clippy::return_self_not_must_use)] // Not all builder methods need must_use
28#![allow(clippy::cast_possible_wrap)] // Controlled wrapping in processing code
29#![allow(clippy::cast_lossless)] // Explicit casts preferred for clarity
30#![allow(clippy::wildcard_imports)] // Prelude imports are convenient and standard
31#![allow(clippy::format_push_string)] // Sometimes more readable than alternative
32#![allow(clippy::redundant_closure_for_method_calls)] // Closures sometimes needed for type inference
33#![allow(clippy::too_many_arguments)] // Some functions naturally need many parameters
34#![allow(clippy::field_reassign_with_default)] // Sometimes clearer than builder pattern
35#![allow(clippy::trivially_copy_pass_by_ref)] // API consistency more important
36#![allow(clippy::await_holding_lock)] // Controlled lock holding in async contexts
37
38use async_trait::async_trait;
39use serde::{Deserialize, Serialize};
40use std::collections::HashMap;
41use thiserror::Error;
42
43/// Result type for G2P operations
44pub type Result<T> = std::result::Result<T, G2pError>;
45
46/// G2P-specific error types
47#[derive(Error, Debug)]
48pub enum G2pError {
49    /// G2P conversion failed during phoneme generation
50    #[error("G2P conversion failed: {0}")]
51    ConversionError(String),
52
53    /// Specified language is not supported by the current backend
54    #[error("Unsupported language: {0:?}")]
55    UnsupportedLanguage(LanguageCode),
56
57    /// Failed to load or initialize a G2P model
58    #[error("Model loading failed: {0}")]
59    ModelError(String),
60
61    /// Configuration file or parameters are invalid
62    #[error("Configuration error: {0}")]
63    ConfigError(String),
64
65    /// Input text is malformed or empty
66    #[error("Invalid input: {0}")]
67    InvalidInput(String),
68
69    /// IO operation failed (file read/write, network, etc.)
70    #[error("IO error: {0}")]
71    IoError(#[from] std::io::Error),
72
73    /// Phoneme validation constraints were violated
74    #[error("Phoneme validation failed: {0}")]
75    PhonemeValidationError(String),
76
77    /// Backend-specific error occurred during processing
78    #[error("Backend error: {backend} - {message}")]
79    BackendError {
80        /// Name of the backend that encountered the error
81        backend: String,
82        /// Detailed error message from the backend
83        message: String,
84    },
85
86    /// Text preprocessing stage failed
87    #[error("Preprocessing error: {0}")]
88    PreprocessingError(String),
89
90    /// Performance optimization operations failed
91    #[error("Performance optimization failed: {0}")]
92    OptimizationError(String),
93}
94
95/// Diagnostic context for G2P conversion issues
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct G2pDiagnosticContext {
98    /// Original input text
99    pub input_text: String,
100    /// Detected or specified language
101    pub language: LanguageCode,
102    /// Backend used for conversion
103    pub backend: String,
104    /// Processing stage where error occurred
105    pub stage: ProcessingStage,
106    /// Additional context information
107    pub context: HashMap<String, String>,
108    /// Timestamp of the error
109    pub timestamp: u64,
110}
111
112/// Processing stages for diagnostic context
113#[derive(Debug, Clone, Serialize, Deserialize)]
114pub enum ProcessingStage {
115    /// Text preprocessing stage
116    Preprocessing,
117    /// Language detection stage
118    LanguageDetection,
119    /// Backend selection stage
120    BackendSelection,
121    /// Phoneme conversion stage
122    PhonemeConversion,
123    /// Phoneme validation stage
124    PhonemeValidation,
125    /// Post-processing stage
126    PostProcessing,
127}
128
129impl G2pDiagnosticContext {
130    /// Create a new diagnostic context
131    pub fn new(
132        input_text: String,
133        language: LanguageCode,
134        backend: String,
135        stage: ProcessingStage,
136    ) -> Self {
137        Self {
138            input_text,
139            language,
140            backend,
141            stage,
142            context: HashMap::new(),
143            timestamp: std::time::SystemTime::now()
144                .duration_since(std::time::UNIX_EPOCH)
145                .unwrap_or_default()
146                .as_secs(),
147        }
148    }
149
150    /// Add context information
151    pub fn add_context(mut self, key: String, value: String) -> Self {
152        self.context.insert(key, value);
153        self
154    }
155
156    /// Get a formatted diagnostic report
157    pub fn format_diagnostic_report(&self) -> String {
158        format!(
159            "G2P Diagnostic Report\n\
160            ====================\n\
161            Input Text: {}\n\
162            Language: {:?}\n\
163            Backend: {}\n\
164            Processing Stage: {:?}\n\
165            Timestamp: {}\n\
166            Context: {:?}",
167            self.input_text, self.language, self.backend, self.stage, self.timestamp, self.context
168        )
169    }
170}
171
172/// Language codes supported by VoiRS
173#[derive(
174    Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default,
175)]
176pub enum LanguageCode {
177    /// English (US)
178    #[default]
179    EnUs,
180    /// English (UK)
181    EnGb,
182    /// Japanese
183    Ja,
184    /// Mandarin Chinese
185    ZhCn,
186    /// Korean
187    Ko,
188    /// German
189    De,
190    /// French
191    Fr,
192    /// Spanish
193    Es,
194    /// Italian
195    It,
196    /// Portuguese
197    Pt,
198    /// Russian
199    Ru,
200    /// Arabic
201    Ar,
202}
203
204impl LanguageCode {
205    /// Get string representation
206    pub fn as_str(&self) -> &'static str {
207        match self {
208            LanguageCode::EnUs => "en-US",
209            LanguageCode::EnGb => "en-GB",
210            LanguageCode::Ja => "ja",
211            LanguageCode::ZhCn => "zh-CN",
212            LanguageCode::Ko => "ko",
213            LanguageCode::De => "de",
214            LanguageCode::Fr => "fr",
215            LanguageCode::Es => "es",
216            LanguageCode::It => "it",
217            LanguageCode::Pt => "pt",
218            LanguageCode::Ru => "ru",
219            LanguageCode::Ar => "ar",
220        }
221    }
222
223    /// Get the full language name
224    pub fn full_name(&self) -> &'static str {
225        match self {
226            LanguageCode::EnUs => "English (United States)",
227            LanguageCode::EnGb => "English (United Kingdom)",
228            LanguageCode::Ja => "Japanese",
229            LanguageCode::ZhCn => "Mandarin Chinese",
230            LanguageCode::Ko => "Korean",
231            LanguageCode::De => "German",
232            LanguageCode::Fr => "French",
233            LanguageCode::Es => "Spanish",
234            LanguageCode::It => "Italian",
235            LanguageCode::Pt => "Portuguese",
236            LanguageCode::Ru => "Russian",
237            LanguageCode::Ar => "Arabic",
238        }
239    }
240
241    /// Check if language uses right-to-left script
242    pub fn is_rtl(&self) -> bool {
243        matches!(self, LanguageCode::Ar)
244    }
245
246    /// Get the script type for this language
247    pub fn script_type(&self) -> &'static str {
248        match self {
249            LanguageCode::EnUs
250            | LanguageCode::EnGb
251            | LanguageCode::De
252            | LanguageCode::Fr
253            | LanguageCode::Es
254            | LanguageCode::It
255            | LanguageCode::Pt => "Latin",
256            LanguageCode::Ru => "Cyrillic",
257            LanguageCode::Ar => "Arabic",
258            LanguageCode::Ja => "Japanese (Kanji/Hiragana/Katakana)",
259            LanguageCode::ZhCn => "Chinese (Simplified)",
260            LanguageCode::Ko => "Hangul",
261        }
262    }
263}
264
265/// Syllable position for phonemes
266#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
267pub enum SyllablePosition {
268    /// Beginning of syllable
269    Onset,
270    /// Vowel part of syllable
271    Nucleus,
272    /// End of syllable
273    Coda,
274    /// End of word/syllable
275    Final,
276    /// Standalone syllable
277    Standalone,
278}
279
280/// Phonetic features for IPA classification
281#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
282pub struct PhoneticFeatures {
283    /// Vowel/consonant classification
284    pub manner: Option<String>, // vowel, plosive, fricative, nasal, etc.
285    /// Place of articulation
286    pub place: Option<String>, // bilabial, alveolar, velar, etc.
287    /// Voice/voiceless
288    pub voice: Option<bool>,
289    /// Front/central/back (for vowels)
290    pub frontness: Option<String>,
291    /// High/mid/low (for vowels)
292    pub height: Option<String>,
293    /// Rounded/unrounded (for vowels)
294    pub rounded: Option<bool>,
295    /// Additional features
296    pub other: HashMap<String, String>,
297}
298
299impl PhoneticFeatures {
300    /// Create new empty phonetic features
301    pub fn new() -> Self {
302        Self {
303            manner: None,
304            place: None,
305            voice: None,
306            frontness: None,
307            height: None,
308            rounded: None,
309            other: HashMap::new(),
310        }
311    }
312
313    /// Create vowel features
314    pub fn vowel(height: &str, frontness: &str, rounded: bool) -> Self {
315        Self {
316            manner: Some("vowel".to_string()),
317            place: None,
318            voice: Some(true), // vowels are voiced
319            frontness: Some(frontness.to_string()),
320            height: Some(height.to_string()),
321            rounded: Some(rounded),
322            other: HashMap::new(),
323        }
324    }
325
326    /// Create consonant features
327    pub fn consonant(manner: &str, place: &str, voiced: bool) -> Self {
328        Self {
329            manner: Some(manner.to_string()),
330            place: Some(place.to_string()),
331            voice: Some(voiced),
332            frontness: None,
333            height: None,
334            rounded: None,
335            other: HashMap::new(),
336        }
337    }
338}
339
340impl Default for PhoneticFeatures {
341    fn default() -> Self {
342        Self::new()
343    }
344}
345
346/// A phoneme with its symbol and detailed features
347#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
348pub struct Phoneme {
349    /// Phoneme symbol (IPA or language-specific)
350    pub symbol: String,
351    /// IPA symbol if different from main symbol
352    pub ipa_symbol: Option<String>,
353    /// Language-specific notation (ARPAbet, SAMPA, etc.)
354    pub language_notation: Option<String>,
355    /// Stress level: 0=none, 1=primary, 2=secondary, 3=tertiary
356    pub stress: u8,
357    /// Position within syllable
358    pub syllable_position: SyllablePosition,
359    /// Duration in milliseconds (if available)
360    pub duration_ms: Option<f32>,
361    /// Confidence score (0.0-1.0)
362    pub confidence: f32,
363    /// Structured phonetic features
364    pub phonetic_features: Option<PhoneticFeatures>,
365    /// Optional custom features
366    pub custom_features: Option<HashMap<String, String>>,
367    /// Word boundary marker
368    pub is_word_boundary: bool,
369    /// Syllable boundary marker
370    pub is_syllable_boundary: bool,
371}
372
373impl Phoneme {
374    /// Create new phoneme with default values
375    pub fn new<S: Into<String>>(symbol: S) -> Self {
376        Self {
377            symbol: symbol.into(),
378            ipa_symbol: None,
379            language_notation: None,
380            stress: 0,
381            syllable_position: SyllablePosition::Standalone,
382            duration_ms: None,
383            confidence: 1.0,
384            phonetic_features: None,
385            custom_features: None,
386            is_word_boundary: false,
387            is_syllable_boundary: false,
388        }
389    }
390
391    /// Fast constructor for single-character phonemes (DummyG2p optimization)
392    #[inline]
393    pub fn from_char(c: char) -> Self {
394        Self {
395            symbol: c.to_string(),
396            ipa_symbol: None,
397            language_notation: None,
398            stress: 0,
399            syllable_position: SyllablePosition::Standalone,
400            duration_ms: None,
401            confidence: 1.0,
402            phonetic_features: None,
403            custom_features: None,
404            is_word_boundary: false,
405            is_syllable_boundary: false,
406        }
407    }
408
409    /// Create phoneme with stress and syllable position
410    pub fn with_stress<S: Into<String>>(
411        symbol: S,
412        stress: u8,
413        syllable_position: SyllablePosition,
414    ) -> Self {
415        Self {
416            symbol: symbol.into(),
417            ipa_symbol: None,
418            language_notation: None,
419            stress,
420            syllable_position,
421            duration_ms: None,
422            confidence: 1.0,
423            phonetic_features: None,
424            custom_features: None,
425            is_word_boundary: false,
426            is_syllable_boundary: false,
427        }
428    }
429
430    /// Create phoneme with confidence score
431    pub fn with_confidence<S: Into<String>>(symbol: S, confidence: f32) -> Self {
432        Self {
433            symbol: symbol.into(),
434            ipa_symbol: None,
435            language_notation: None,
436            stress: 0,
437            syllable_position: SyllablePosition::Standalone,
438            duration_ms: None,
439            confidence,
440            phonetic_features: None,
441            custom_features: None,
442            is_word_boundary: false,
443            is_syllable_boundary: false,
444        }
445    }
446
447    /// Create phoneme with duration in milliseconds
448    pub fn with_duration<S: Into<String>>(symbol: S, duration_ms: f32) -> Self {
449        Self {
450            symbol: symbol.into(),
451            ipa_symbol: None,
452            language_notation: None,
453            stress: 0,
454            syllable_position: SyllablePosition::Standalone,
455            duration_ms: Some(duration_ms),
456            confidence: 1.0,
457            phonetic_features: None,
458            custom_features: None,
459            is_word_boundary: false,
460            is_syllable_boundary: false,
461        }
462    }
463
464    /// Create phoneme with custom features
465    pub fn with_custom_features<S: Into<String>>(
466        symbol: S,
467        features: HashMap<String, String>,
468    ) -> Self {
469        Self {
470            symbol: symbol.into(),
471            ipa_symbol: None,
472            language_notation: None,
473            stress: 0,
474            syllable_position: SyllablePosition::Standalone,
475            duration_ms: None,
476            confidence: 1.0,
477            phonetic_features: None,
478            custom_features: Some(features),
479            is_word_boundary: false,
480            is_syllable_boundary: false,
481        }
482    }
483
484    /// Create a fully specified phoneme
485    #[allow(clippy::too_many_arguments)]
486    pub fn full<S: Into<String>>(
487        symbol: S,
488        ipa_symbol: Option<String>,
489        language_notation: Option<String>,
490        stress: u8,
491        syllable_position: SyllablePosition,
492        duration_ms: Option<f32>,
493        confidence: f32,
494        phonetic_features: Option<PhoneticFeatures>,
495        custom_features: Option<HashMap<String, String>>,
496        is_word_boundary: bool,
497        is_syllable_boundary: bool,
498    ) -> Self {
499        Self {
500            symbol: symbol.into(),
501            ipa_symbol,
502            language_notation,
503            stress,
504            syllable_position,
505            duration_ms,
506            confidence,
507            phonetic_features,
508            custom_features,
509            is_word_boundary,
510            is_syllable_boundary,
511        }
512    }
513
514    /// Create phoneme with IPA symbol
515    pub fn with_ipa<S: Into<String>, I: Into<String>>(symbol: S, ipa_symbol: I) -> Self {
516        Self {
517            symbol: symbol.into(),
518            ipa_symbol: Some(ipa_symbol.into()),
519            language_notation: None,
520            stress: 0,
521            syllable_position: SyllablePosition::Standalone,
522            duration_ms: None,
523            confidence: 1.0,
524            phonetic_features: None,
525            custom_features: None,
526            is_word_boundary: false,
527            is_syllable_boundary: false,
528        }
529    }
530
531    /// Create phoneme with phonetic features
532    pub fn with_phonetic_features<S: Into<String>>(symbol: S, features: PhoneticFeatures) -> Self {
533        Self {
534            symbol: symbol.into(),
535            ipa_symbol: None,
536            language_notation: None,
537            stress: 0,
538            syllable_position: SyllablePosition::Standalone,
539            duration_ms: None,
540            confidence: 1.0,
541            phonetic_features: Some(features),
542            custom_features: None,
543            is_word_boundary: false,
544            is_syllable_boundary: false,
545        }
546    }
547
548    /// Create word boundary marker
549    pub fn word_boundary() -> Self {
550        Self {
551            symbol: " ".to_string(),
552            ipa_symbol: None,
553            language_notation: None,
554            stress: 0,
555            syllable_position: SyllablePosition::Standalone,
556            duration_ms: None,
557            confidence: 1.0,
558            phonetic_features: None,
559            custom_features: None,
560            is_word_boundary: true,
561            is_syllable_boundary: false,
562        }
563    }
564
565    /// Create syllable boundary marker
566    pub fn syllable_boundary() -> Self {
567        Self {
568            symbol: ".".to_string(),
569            ipa_symbol: None,
570            language_notation: None,
571            stress: 0,
572            syllable_position: SyllablePosition::Standalone,
573            duration_ms: None,
574            confidence: 1.0,
575            phonetic_features: None,
576            custom_features: None,
577            is_word_boundary: false,
578            is_syllable_boundary: true,
579        }
580    }
581
582    /// Check if phoneme is a vowel based on phonetic features
583    pub fn is_vowel(&self) -> bool {
584        self.phonetic_features
585            .as_ref()
586            .and_then(|f| f.manner.as_ref())
587            .map(|m| m == "vowel")
588            .unwrap_or(false)
589    }
590
591    /// Check if phoneme is a consonant based on phonetic features
592    pub fn is_consonant(&self) -> bool {
593        self.phonetic_features
594            .as_ref()
595            .and_then(|f| f.manner.as_ref())
596            .map(|m| m != "vowel")
597            .unwrap_or(false)
598    }
599
600    /// Get effective symbol (IPA if available, otherwise main symbol)
601    pub fn effective_symbol(&self) -> &str {
602        self.ipa_symbol.as_ref().unwrap_or(&self.symbol)
603    }
604
605    /// Check if phoneme has primary stress
606    pub fn has_primary_stress(&self) -> bool {
607        self.stress == 1
608    }
609
610    /// Check if phoneme has secondary stress
611    pub fn has_secondary_stress(&self) -> bool {
612        self.stress == 2
613    }
614
615    /// Check if phoneme has any stress
616    pub fn has_stress(&self) -> bool {
617        self.stress > 0
618    }
619
620    /// Get duration or estimate based on phoneme type
621    pub fn duration_or_estimate(&self) -> f32 {
622        if let Some(duration) = self.duration_ms {
623            return duration;
624        }
625
626        // Provide rough duration estimates based on phoneme characteristics
627        if self.is_word_boundary || self.is_syllable_boundary {
628            return 0.0;
629        }
630
631        if self.is_vowel() {
632            120.0 // vowels are typically longer
633        } else if self.is_consonant() {
634            80.0 // consonants are typically shorter
635        } else {
636            100.0 // default estimate
637        }
638    }
639
640    /// Check if phoneme is at the beginning of a syllable
641    pub fn is_syllable_initial(&self) -> bool {
642        matches!(self.syllable_position, SyllablePosition::Onset)
643    }
644}
645
646/// G2P metadata information
647#[derive(Debug, Clone, Serialize, Deserialize)]
648pub struct G2pMetadata {
649    /// Model name
650    pub name: String,
651    /// Model version
652    pub version: String,
653    /// Model description
654    pub description: String,
655    /// Supported languages
656    pub supported_languages: Vec<LanguageCode>,
657    /// Accuracy scores per language (if available)
658    pub accuracy_scores: HashMap<LanguageCode, f32>,
659}
660
661/// Trait for grapheme-to-phoneme conversion
662#[async_trait]
663pub trait G2p: Send + Sync {
664    /// Convert text to phonemes
665    async fn to_phonemes(&self, text: &str, lang: Option<LanguageCode>) -> Result<Vec<Phoneme>>;
666
667    /// Get supported languages
668    fn supported_languages(&self) -> Vec<LanguageCode>;
669
670    /// Get model metadata
671    fn metadata(&self) -> G2pMetadata;
672}
673
674pub mod accuracy;
675pub mod advanced;
676pub mod backends;
677pub mod config;
678pub mod detection;
679pub mod english;
680pub mod languages;
681pub mod models;
682pub mod optimization;
683pub mod performance;
684pub mod phonology;
685pub mod preprocessing;
686pub mod rules;
687pub mod ssml;
688pub mod ssml_legacy;
689pub mod streaming;
690pub mod training;
691pub mod utils;
692
693/// Prelude for convenient imports
694pub mod prelude {
695    pub use crate::backends::{ChinesePinyinG2p, JapaneseDictG2p};
696    pub use crate::{
697        DummyG2p, G2p, G2pConverter, G2pError, G2pMetadata, LanguageCode, Phoneme,
698        PhoneticFeatures, Result, SyllablePosition,
699    };
700    pub use async_trait::async_trait;
701}
702
703// Types are already public in the root module
704
705/// G2P converter with multiple backend support
706pub struct G2pConverter {
707    backends: HashMap<LanguageCode, Box<dyn G2p>>,
708    default_backend: Option<Box<dyn G2p>>,
709}
710
711impl G2pConverter {
712    /// Create new G2P converter
713    pub fn new() -> Self {
714        Self {
715            backends: HashMap::new(),
716            default_backend: None,
717        }
718    }
719
720    /// Add backend for specific language
721    pub fn add_backend(&mut self, language: LanguageCode, backend: Box<dyn G2p>) {
722        self.backends.insert(language, backend);
723    }
724
725    /// Set default backend for unknown languages
726    pub fn set_default_backend(&mut self, backend: Box<dyn G2p>) {
727        self.default_backend = Some(backend);
728    }
729
730    /// Get backend for language
731    fn get_backend(&self, language: Option<LanguageCode>) -> Result<&dyn G2p> {
732        if let Some(lang) = language {
733            if let Some(backend) = self.backends.get(&lang) {
734                return Ok(backend.as_ref());
735            }
736        }
737
738        if let Some(default) = &self.default_backend {
739            Ok(default.as_ref())
740        } else {
741            Err(G2pError::ConfigError(
742                "No G2P backend available".to_string(),
743            ))
744        }
745    }
746}
747
748impl Default for G2pConverter {
749    fn default() -> Self {
750        Self::new()
751    }
752}
753
754#[async_trait]
755impl G2p for G2pConverter {
756    async fn to_phonemes(&self, text: &str, lang: Option<LanguageCode>) -> Result<Vec<Phoneme>> {
757        let backend = self.get_backend(lang)?;
758        backend.to_phonemes(text, lang).await
759    }
760
761    fn supported_languages(&self) -> Vec<LanguageCode> {
762        let mut languages: Vec<LanguageCode> = self.backends.keys().copied().collect();
763
764        // Add languages from default backend if available
765        if let Some(default) = &self.default_backend {
766            languages.extend(default.supported_languages());
767        }
768
769        languages.sort();
770        languages.dedup();
771        languages
772    }
773
774    fn metadata(&self) -> G2pMetadata {
775        let mut accuracy_scores = HashMap::new();
776
777        // Collect accuracy scores from all backends
778        for (lang, backend) in &self.backends {
779            let backend_metadata = backend.metadata();
780            if let Some(score) = backend_metadata.accuracy_scores.get(lang) {
781                accuracy_scores.insert(*lang, *score);
782            }
783        }
784
785        // Add default accuracy scores for backends if not provided
786        for lang in self.supported_languages() {
787            accuracy_scores.entry(lang).or_insert_with(|| {
788                match lang {
789                    LanguageCode::EnUs | LanguageCode::EnGb => 0.85, // English typically higher
790                    LanguageCode::De
791                    | LanguageCode::Fr
792                    | LanguageCode::Es
793                    | LanguageCode::It
794                    | LanguageCode::Pt => 0.80, // European languages
795                    LanguageCode::Ja | LanguageCode::Ru | LanguageCode::Ar => 0.75, // Complex phonology
796                    LanguageCode::ZhCn | LanguageCode::Ko => 0.70, // CJK languages are challenging
797                }
798            });
799        }
800
801        G2pMetadata {
802            name: "VoiRS G2P Converter".to_string(),
803            version: env!("CARGO_PKG_VERSION").to_string(),
804            description: "Multi-backend grapheme-to-phoneme converter".to_string(),
805            supported_languages: self.supported_languages(),
806            accuracy_scores,
807        }
808    }
809}
810
811/// Dummy G2P backend for testing and fallback
812pub struct DummyG2p {
813    supported_langs: Vec<LanguageCode>,
814}
815
816impl DummyG2p {
817    /// Create new dummy G2P backend
818    pub fn new() -> Self {
819        Self {
820            supported_langs: vec![LanguageCode::EnUs],
821        }
822    }
823
824    /// Create with custom supported languages
825    pub fn with_languages(languages: Vec<LanguageCode>) -> Self {
826        Self {
827            supported_langs: languages,
828        }
829    }
830}
831
832impl Default for DummyG2p {
833    fn default() -> Self {
834        Self::new()
835    }
836}
837
838#[async_trait]
839impl G2p for DummyG2p {
840    async fn to_phonemes(&self, text: &str, _lang: Option<LanguageCode>) -> Result<Vec<Phoneme>> {
841        // Simple character-to-phoneme mapping for testing
842        let phonemes: Vec<Phoneme> = text
843            .chars()
844            .filter(|c| c.is_alphabetic())
845            .map(Phoneme::from_char)
846            .collect();
847
848        // Debug logging only when trace level is enabled to avoid performance impact
849        if tracing::enabled!(tracing::Level::TRACE) {
850            tracing::trace!(
851                "DummyG2p: Generated {} phonemes for '{}'",
852                phonemes.len(),
853                text
854            );
855        }
856        Ok(phonemes)
857    }
858
859    fn supported_languages(&self) -> Vec<LanguageCode> {
860        // Return cloned vector - optimized for common single-language case
861        self.supported_langs.clone()
862    }
863
864    fn metadata(&self) -> G2pMetadata {
865        G2pMetadata {
866            name: "Dummy G2P".to_string(),
867            version: "0.1.0".to_string(),
868            description: "Dummy G2P backend for testing".to_string(),
869            supported_languages: self.supported_languages(),
870            accuracy_scores: HashMap::new(),
871        }
872    }
873}
874
875// Implement G2p for Box<dyn G2p> to enable trait object usage
876#[async_trait]
877impl G2p for Box<dyn G2p> {
878    async fn to_phonemes(&self, text: &str, lang: Option<LanguageCode>) -> Result<Vec<Phoneme>> {
879        self.as_ref().to_phonemes(text, lang).await
880    }
881
882    fn supported_languages(&self) -> Vec<LanguageCode> {
883        self.as_ref().supported_languages()
884    }
885
886    fn metadata(&self) -> G2pMetadata {
887        self.as_ref().metadata()
888    }
889}
890
891#[cfg(test)]
892mod tests {
893    use super::*;
894
895    #[tokio::test]
896    async fn test_g2p_converter() {
897        let mut converter = G2pConverter::new();
898
899        // Add dummy backend for English
900        converter.add_backend(LanguageCode::EnUs, Box::new(DummyG2p::new()));
901
902        // Test conversion
903        let phonemes = converter
904            .to_phonemes("hello", Some(LanguageCode::EnUs))
905            .await
906            .unwrap();
907        assert_eq!(phonemes.len(), 5); // h-e-l-l-o
908
909        // Test supported languages
910        let languages = converter.supported_languages();
911        assert!(languages.contains(&LanguageCode::EnUs));
912    }
913
914    #[tokio::test]
915    async fn test_dummy_g2p() {
916        let g2p = DummyG2p::new();
917
918        let phonemes = g2p.to_phonemes("test", None).await.unwrap();
919        assert_eq!(phonemes.len(), 4);
920        assert_eq!(phonemes[0].symbol, "t");
921        assert_eq!(phonemes[1].symbol, "e");
922
923        let languages = g2p.supported_languages();
924        assert_eq!(languages, vec![LanguageCode::EnUs]);
925    }
926
927    #[tokio::test]
928    async fn test_english_rule_g2p() {
929        use crate::rules::EnglishRuleG2p;
930
931        let g2p = EnglishRuleG2p::new().unwrap();
932
933        // Test basic dictionary words - "the" should split into ð and ə
934        let phonemes = g2p
935            .to_phonemes("the", Some(LanguageCode::EnUs))
936            .await
937            .unwrap();
938        assert_eq!(phonemes.len(), 2);
939        assert_eq!(phonemes[0].symbol, "ð");
940        assert_eq!(phonemes[1].symbol, "ə");
941
942        // Test rule-based conversion
943        let phonemes = g2p
944            .to_phonemes("cat", Some(LanguageCode::EnUs))
945            .await
946            .unwrap();
947        assert_eq!(phonemes.len(), 3);
948        assert_eq!(phonemes[0].symbol, "k"); // c -> k
949        assert_eq!(phonemes[1].symbol, "æ"); // a -> æ
950        assert_eq!(phonemes[2].symbol, "t"); // t -> t
951
952        // Test multiple words
953        let phonemes = g2p
954            .to_phonemes("hello world", Some(LanguageCode::EnUs))
955            .await
956            .unwrap();
957        assert!(phonemes.len() > 5); // Multiple phonemes with word boundary
958
959        // Test supported languages
960        let languages = g2p.supported_languages();
961        assert!(languages.contains(&LanguageCode::EnUs));
962        assert!(languages.contains(&LanguageCode::EnGb));
963    }
964
965    #[tokio::test]
966    async fn test_english_rule_g2p_vowel_patterns() {
967        use crate::rules::EnglishRuleG2p;
968
969        let g2p = EnglishRuleG2p::new().unwrap();
970
971        // Test magic-e pattern
972        let phonemes = g2p
973            .to_phonemes("cake", Some(LanguageCode::EnUs))
974            .await
975            .unwrap();
976        assert_eq!(phonemes.len(), 1);
977        assert_eq!(phonemes[0].symbol, "eɪk"); // ake -> eɪk
978
979        // Test consonant digraphs
980        let phonemes = g2p
981            .to_phonemes("ship", Some(LanguageCode::EnUs))
982            .await
983            .unwrap();
984        assert_eq!(phonemes.len(), 3);
985        assert_eq!(phonemes[0].symbol, "ʃ"); // sh -> ʃ
986        assert_eq!(phonemes[1].symbol, "ɪ"); // i -> ɪ
987        assert_eq!(phonemes[2].symbol, "p"); // p -> p
988
989        // Test vowel combinations
990        let phonemes = g2p
991            .to_phonemes("tree", Some(LanguageCode::EnUs))
992            .await
993            .unwrap();
994        assert_eq!(phonemes.len(), 3);
995        assert_eq!(phonemes[0].symbol, "t"); // t -> t
996        assert_eq!(phonemes[1].symbol, "r"); // r -> r
997        assert_eq!(phonemes[2].symbol, "iː"); // ee -> iː
998    }
999}