Expand description
§VoiRS Acoustic Models
Neural acoustic models for converting phonemes to mel spectrograms. Supports VITS, FastSpeech2, and other state-of-the-art architectures.
Re-exports§
pub use backends::Backend;pub use backends::BackendManager;pub use batch_processor::BatchProcessingStats;pub use batch_processor::BatchProcessor;pub use batch_processor::BatchProcessorConfig;pub use batch_processor::BatchProcessorTrait;pub use batch_processor::BatchRequest;pub use batch_processor::ErrorStats;pub use batch_processor::MemoryStats;pub use batch_processor::QueueStats;pub use batch_processor::RequestPriority;pub use memory::lazy::ComponentRegistry;pub use memory::lazy::LazyComponent;pub use memory::lazy::MemmapFile;pub use memory::lazy::MemoryPressureHandler;pub use memory::lazy::MemoryPressureLevel;pub use memory::lazy::MemoryPressureStatus;pub use memory::lazy::ProgressiveLoader;pub use memory::AdvancedPerformanceProfiler;pub use memory::MemoryOptimizer;pub use memory::OperationTimer;pub use memory::PerformanceMetrics;pub use memory::PerformanceMonitor;pub use memory::PerformanceReport;pub use memory::PerformanceSnapshot;pub use memory::PerformanceThresholds;pub use memory::PoolStats;pub use memory::ResultCache;pub use memory::SystemInfo;pub use memory::SystemMemoryInfo;pub use memory::TensorMemoryPool;pub use metrics::EvaluationConfig;pub use metrics::EvaluationPreset;pub use metrics::MetricStatistics;pub use metrics::ObjectiveEvaluator;pub use metrics::ObjectiveMetrics;pub use metrics::PerceptualEvaluator;pub use metrics::PerceptualMetrics;pub use metrics::ProsodyEvaluator;pub use metrics::ProsodyFeatures;pub use metrics::ProsodyMetrics;pub use metrics::QualityEvaluator;pub use metrics::QualityMetrics;pub use metrics::QualityStatistics;pub use metrics::RhythmFeatures;pub use metrics::WindowType;pub use models::DummyAcousticConfig;pub use models::DummyAcousticModel;pub use models::ModelLoader;pub use optimization::DistillationConfig;pub use optimization::DistillationStrategy;pub use optimization::HardwareOptimization;pub use optimization::HardwareTarget;pub use optimization::ModelOptimizer;pub use optimization::OptimizationConfig;pub use optimization::OptimizationMetrics;pub use optimization::OptimizationReport;pub use optimization::OptimizationTargets;pub use optimization::PruningConfig;pub use optimization::PruningStrategy;pub use optimization::PruningType;pub use optimization::QuantizationConfig as OptQuantizationConfig;pub use optimization::QuantizationMethod as OptQuantizationMethod;pub use optimization::QuantizationPrecision as OptQuantizationPrecision;pub use prosody::DurationConfig;pub use prosody::EnergyConfig;pub use prosody::EnergyContourPattern;pub use prosody::IntonationPattern;pub use prosody::PauseDurations;pub use prosody::PitchConfig;pub use prosody::ProsodyAdjustment;pub use prosody::ProsodyConfig;pub use prosody::ProsodyController;pub use prosody::RhythmPattern;pub use prosody::VibratoConfig;pub use prosody::VoiceQualityConfig;pub use quantization::ModelQuantizer;pub use quantization::QuantizationBenchmark;pub use quantization::QuantizationConfig;pub use quantization::QuantizationMethod;pub use quantization::QuantizationParams;pub use quantization::QuantizationPrecision;pub use quantization::QuantizationStats;pub use quantization::QuantizedTensor;pub use simd::Complex;pub use simd::FftWindow;pub use simd::SimdAudioEffects;pub use simd::SimdAudioProcessor;pub use simd::SimdCapabilities;pub use simd::SimdDispatcher;pub use simd::SimdFft;pub use simd::SimdLinearLayer;pub use simd::SimdMatrix;pub use simd::SimdMelComputer;pub use simd::SimdStft;pub use simd::StftWindow;pub use simd::WindowFunction;pub use singing::ArticulationMarking;pub use singing::BreathControlConfig;pub use singing::DynamicsMarking;pub use singing::FormantAdjustment;pub use singing::KeySignature;pub use singing::MusicalNote;pub use singing::MusicalPhrase;pub use singing::ResonanceConfig;pub use singing::SingingConfig;pub use singing::SingingTechnique;pub use singing::SingingVibratoConfig;pub use singing::SingingVoiceSynthesizer;pub use singing::VocalRegister;pub use singing::VoiceType;pub use speaker::Accent;pub use speaker::AgeGroup;pub use speaker::AudioFeatures;pub use speaker::AudioReference;pub use speaker::CloningQualityMetrics;pub use speaker::CrossLanguageSpeakerAdapter;pub use speaker::EmotionConfig;pub use speaker::EmotionModel;pub use speaker::EmotionType;pub use speaker::FewShotSpeakerAdaptation;pub use speaker::Gender;pub use speaker::MultiSpeakerConfig;pub use speaker::MultiSpeakerModel;pub use speaker::PersonalityTrait;pub use speaker::SpeakerEmbedding;pub use speaker::SpeakerId;pub use speaker::SpeakerMetadata;pub use speaker::SpeakerRegistry;pub use speaker::SpeakerVerificationResult;pub use speaker::SpeakerVerifier;pub use speaker::VoiceCharacteristics;pub use speaker::VoiceCloningConfig;pub use speaker::VoiceCloningQualityAssessor;pub use speaker::VoiceQuality;pub use streaming::LatencyOptimizer;pub use streaming::LatencyOptimizerConfig;pub use streaming::LatencyStats;pub use streaming::LatencyStrategy;pub use streaming::PerformanceMeasurement;pub use streaming::PerformancePredictor;pub use streaming::StreamingConfig;pub use streaming::StreamingMetrics;pub use streaming::StreamingState;pub use streaming::StreamingSynthesizer;pub use traits::AcousticModel;pub use traits::AcousticModelFeature;pub use traits::AcousticModelMetadata;pub use vits::TextEncoder;pub use vits::TextEncoderConfig;pub use vits::VitsConfig;pub use vits::VitsModel;pub use vits::VitsStreamingState;pub use latency_optimizer::ChunkStrategy;pub use latency_optimizer::LatencyBudget;pub use latency_optimizer::LatencyMeasurement;pub use latency_optimizer::LatencyOptimizer as AdvancedLatencyOptimizer;pub use latency_optimizer::LatencyStatistics;pub use latency_optimizer::ProcessingPriority;pub use neural_codec::CodecQualityMetrics;pub use neural_codec::CodecType;pub use neural_codec::NeuralCodec;pub use neural_codec::NeuralCodecConfig;pub use vad::VadConfig;pub use vad::VadSegment;pub use vad::VoiceActivity;pub use vad::VoiceActivityDetector;pub use config::*;pub use mel::*;
Modules§
- acoustic_
utils - Acoustic Processing Utilities
- backends
- Backend implementations for acoustic models
- batch_
processor - Advanced Batch Processing System for VoiRS Acoustic Models
- batching
- Dynamic batching system for variable-length sequences
- cache
- Advanced caching strategies for acoustic synthesis
- conditioning
- Conditional layers for feature-controlled synthesis.
- config
- Configuration management for acoustic models
- diagnostics
- Advanced diagnostics and analysis tools for acoustic modeling
- error
- Enhanced error handling with diagnostic context and recovery suggestions
- fastspeech
- FastSpeech2 implementation.
- fastspeech2_
trainer - FastSpeech2 model training infrastructure
- fusion
- Kernel Fusion Optimization Module
- latency_
optimizer - Advanced Latency Optimization for Real-Time TTS
- mel
- Mel spectrogram computation and processing
- memory
- Advanced memory management utilities for efficient acoustic model inference
- metrics
- Audio quality metrics for TTS evaluation
- model_
manager - Auto-generated module structure
- model_
warmup - Model warmup and preloading utilities
- models
- Acoustic model definitions and management.
- neural_
codec - Neural Audio Codec Integration
- optimization
- Model optimization techniques for efficient inference
- parallel_
attention - Parallel attention computation for improved performance
- performance_
targets - Performance targets monitoring and validation system
- prelude
- Prelude for convenient imports
- production
- Production hardening features for reliable acoustic synthesis
- production_
monitoring - Advanced production monitoring and observability
- profiling
- Advanced Performance Profiling and Tracing System
- profiling_
integration - Integration between Performance Profiling and Production Monitoring
- prosody
- Prosody control for natural speech synthesis.
- quantization
- Model quantization utilities for compression and optimization
- scirs2_
ops - SciRS2-Optimized Acoustic Operations
- simd
- SIMD-accelerated operations for acoustic processing
- singing
- Singing voice synthesis functionality for acoustic models.
- speaker
- Speaker control and voice characteristics management.
- streaming
- Streaming synthesis infrastructure for real-time text-to-speech
- synthesis_
cache - Advanced synthesis result caching system
- traits
- Core traits for acoustic models
- unified_
conditioning - Unified conditioning interface for all synthesis features.
- utils
- Utility functions for acoustic modeling.
- vad
- Voice Activity Detection (VAD) Integration
- vits
- VITS (Variational Inference Text-to-Speech) model implementation
Structs§
- Acoustic
Model Manager - Acoustic model manager with multiple architecture support
- MelSpectrogram
- Mel spectrogram representation
- Phoneme
- A phoneme with its symbol and optional features
- Synthesis
Config - Simple synthesis configuration for basic operations
Enums§
- Acoustic
Error - Acoustic model specific error types with enhanced diagnostic information
- Language
Code - Language codes supported by VoiRS
Type Aliases§
- Result
- Result type for acoustic model operations