Expand description
§VoiRS Singing Voice Synthesis System
This crate provides comprehensive singing voice synthesis capabilities including musical note processing, pitch contour generation, rhythm control, vibrato modeling, and musical format support.
§Quick Start
ⓘ
use voirs_singing::prelude::*;
#[tokio::main]
async fn main() -> Result<()> {
// Create a singing engine with default configuration
let config = SingingConfig::default();
let engine = SingingEngine::new(config).await?;
// Create a simple musical score
let mut score = MusicalScore::new();
score.add_note(MusicalNote::new(60, 1.0, 1.0))?; // Middle C, 1 second
// Synthesize the singing voice
let request = SingingRequest::new(score, VoiceType::Soprano);
let response = engine.synthesize(request).await?;
println!("Generated {} samples", response.audio_data.len());
Ok(())
}§Core Components
- SingingEngine: Main engine for voice synthesis
- MusicalScore: Musical notation and timing
- VoiceCharacteristics: Voice parameters and qualities
- EffectChain: Audio effects processing
- SynthesisProcessor: Core synthesis algorithms
§Advanced Features
§Voice Cloning and Style Transfer
ⓘ
let style_transfer = StyleTransfer::new();
let target_style = StyleEmbedding::from_voice_samples(&voice_samples)?;
let result = style_transfer.apply_style(&audio_input, &target_style).await?;§Real-time Performance
ⓘ
let realtime_config = RealtimeConfig::low_latency();
let session = LiveSession::new(realtime_config).await?;
// Process notes in real-time
let note = RealtimeNote::new(60, 0.5, VoiceType::Tenor);
session.play_note(note).await?;§Musical Intelligence
The crate includes advanced musical analysis capabilities:
- Chord Recognition: Automatic chord detection from audio
- Key Detection: Musical key identification
- Rhythm Analysis: Beat and tempo detection
- Scale Analysis: Musical scale recognition
Re-exports§
pub use adaptive_learning::AdaptiveLearningConfig;pub use adaptive_learning::AdaptiveLearningSystem;pub use adaptive_learning::ArticulationParams;pub use adaptive_learning::DynamicsParams;pub use adaptive_learning::LearningStatistics;pub use adaptive_learning::ModelImprovement;pub use adaptive_learning::PersonalizedRecommendations;pub use adaptive_learning::QualityRatings;pub use adaptive_learning::QualityWeights;pub use adaptive_learning::StyleAdaptation;pub use adaptive_learning::UserFeedback;pub use adaptive_learning::UserPreferences;pub use adaptive_learning::VibratoParams;pub use advanced_techniques::AdvancedArticulationProcessor;pub use advanced_techniques::AdvancedDynamicsProcessor;pub use advanced_techniques::AdvancedTechniques;pub use advanced_techniques::BendCurve;pub use advanced_techniques::GraceNoteProcessor;pub use advanced_techniques::MelismaProcessor;pub use advanced_techniques::PitchBendProcessor;pub use advanced_techniques::PitchBendSettings;pub use advanced_techniques::RunPattern;pub use advanced_techniques::VocalRunProcessor;pub use advanced_techniques::VocalRunSettings;pub use ai::AutoHarmonizer;pub use ai::EmotionRecognizer;pub use ai::EmotionResult;pub use ai::ExpressionFeatures;pub use ai::HarmonyModel;pub use ai::HarmonyRules;pub use ai::ImprovisationAssistant;pub use ai::StyleEmbedding;pub use ai::StyleMetadata;pub use ai::StyleTransfer;pub use ai::StyleTransferConfig;pub use ai::StyleTransferResult;pub use ai::TransferQualityMetrics;pub use audio_processing::DynamicRangeProcessor;pub use audio_processing::HighQualityResampler;pub use audio_processing::InterpolationMethod;pub use audio_processing::PanLaw;pub use audio_processing::PhaseCoherenceProcessor;pub use audio_processing::QualityLevel;pub use audio_processing::StereoImagingProcessor;pub use cloud_deployment::CloudConfig;pub use cloud_deployment::CloudDeploymentManager;pub use cloud_deployment::ClusterStats;pub use cloud_deployment::JobStatus;pub use cloud_deployment::LoadBalancingStrategy;pub use cloud_deployment::QualityTier;pub use cloud_deployment::SynthesisJob;pub use cloud_deployment::WorkerNode;pub use cloud_deployment::WorkerStatus;pub use composition_assistant::CompositionAssistant;pub use composition_assistant::CompositionConfig;pub use composition_assistant::GeneratedHarmony;pub use composition_assistant::GeneratedMelody;pub use composition_assistant::HarmonyRequest;pub use composition_assistant::ImprovementSuggestion;pub use composition_assistant::MelodicAnalysis;pub use composition_assistant::MelodyPrompt;pub use composition_assistant::MusicalArrangement;pub use composition_assistant::RhythmPattern;pub use composition_assistant::SuggestionType;pub use config::SingingConfig;pub use config::SingingConfigBuilder;pub use core::SingingEngine;pub use core::SingingEngineBuilder;pub use custom_score::OptimizedScore;pub use custom_score::PerformanceHints;pub use custom_score::ScoreOptimizer;pub use effects::EffectChain;pub use effects::EffectProcessor;pub use effects::SingingEffect;pub use formats::FormatParser;pub use formats::MidiParser;pub use formats::MusicXmlParser;pub use gpu_acceleration::DeviceType;pub use gpu_acceleration::GpuAccelerated;pub use gpu_acceleration::GpuAccelerator;pub use gpu_acceleration::GpuConfig;pub use gpu_acceleration::GpuConfigBuilder;pub use gpu_acceleration::GpuError;pub use gpu_acceleration::MemoryUsage;pub use gpu_acceleration::TensorMemoryPool;pub use granular_synthesis::GrainEnvelope;pub use granular_synthesis::GranularConfig;pub use granular_synthesis::GranularSynthesisEffect;pub use granular_synthesis::GranularTexture;pub use granular_synthesis::WindowFunction;pub use harmony::HarmonyArrangement;pub use harmony::HarmonyType;pub use harmony::MultiVoiceSynthesizer;pub use harmony::VoicePart;pub use historical_practice::ArticulationStyle;pub use historical_practice::ExpressionStyle;pub use historical_practice::HistoricalPeriod;pub use historical_practice::HistoricalPractice;pub use historical_practice::OrnamentsEngine;pub use historical_practice::OrnamentsStyle;pub use historical_practice::PeriodStyle;pub use historical_practice::RegionalStyle;pub use historical_practice::TuningSystem;pub use historical_practice::VibratoStyle;pub use llm_understanding::ArticulationRecommendation;pub use llm_understanding::DynamicSuggestion;pub use llm_understanding::InstructionInterpretation;pub use llm_understanding::LlmConfig;pub use llm_understanding::LlmMusicalUnderstanding;pub use llm_understanding::LlmResponse;pub use llm_understanding::MusicalContext;pub use llm_understanding::MusicalPrompt;pub use llm_understanding::PhrasingBoundary;pub use models::ModelType;pub use models::SingingModel;pub use models::SingingModelBuilder;pub use models::TransformerConfig;pub use models::TransformerSynthesisModel;pub use models::VoiceModel;pub use multimodal::BlendShapeWeights;pub use multimodal::HeadMotion;pub use multimodal::MultimodalResult;pub use multimodal::MultimodalSynthesizer;pub use multimodal::PhonemeTiming;pub use multimodal::SimpleNote;pub use multimodal::Viseme;pub use multimodal::VisualConfig;pub use multimodal::VisualFrame;pub use multimodal::VisualTimeline;pub use musical_intelligence::ChordQuality;pub use musical_intelligence::ChordRecognizer;pub use musical_intelligence::ChordResult;pub use musical_intelligence::KeyDetector;pub use musical_intelligence::KeyMode;pub use musical_intelligence::KeyResult;pub use musical_intelligence::MusicalAnalysis;pub use musical_intelligence::MusicalIntelligence;pub use musical_intelligence::RhythmAnalyzer;pub use musical_intelligence::RhythmResult;pub use musical_intelligence::ScaleAnalyzer;pub use musical_intelligence::ScaleResult;pub use perceptual_quality::ComprehensiveQualityReport;pub use perceptual_quality::ExpressionReport;pub use perceptual_quality::NaturalnessReport;pub use perceptual_quality::PerceptualQualityTester;pub use perceptual_quality::PerformanceReport;pub use perceptual_quality::VoiceQualityReport;pub use performance_optimization::CompressionAlgorithm;pub use performance_optimization::CompressionEngine;pub use performance_optimization::EvictionPolicy;pub use performance_optimization::PrecomputationEngine;pub use performance_optimization::StreamingEngine;pub use performance_optimization::StreamingQuality;pub use performance_optimization::VoiceCache;pub use performance_optimization_advanced::ArchitectureCandidate;pub use performance_optimization_advanced::CompressedModel;pub use performance_optimization_advanced::DistillationConfig;pub use performance_optimization_advanced::DistillationResult;pub use performance_optimization_advanced::EnergyEfficiencyConfig;pub use performance_optimization_advanced::EnergyEfficiencyOptimizer;pub use performance_optimization_advanced::EnergyOptimizationResult;pub use performance_optimization_advanced::HardwareOptimizationResult;pub use performance_optimization_advanced::HardwareOptimizer;pub use performance_optimization_advanced::HardwareOptimizerConfig;pub use performance_optimization_advanced::HardwarePlatform;pub use performance_optimization_advanced::KnowledgeDistiller;pub use performance_optimization_advanced::ModelCompressionConfig;pub use performance_optimization_advanced::ModelCompressor;pub use performance_optimization_advanced::NasConfig;pub use performance_optimization_advanced::NeuralArchitectureSearcher;pub use performance_optimization_advanced::QatConfig;pub use performance_optimization_advanced::QatTrainingResult;pub use performance_optimization_advanced::QuantizationAwareTrainer;pub use physical_modeling::AdvancedVocalTractModel;pub use physical_modeling::Complex32;pub use physical_modeling::PhysicalModelConfig;pub use physical_modeling::PhysicsAccuracyLevel;pub use physical_modeling::VocalTractModel;pub use physical_modeling::VowelPreset;pub use pitch::PitchContour;pub use pitch::PitchGenerator;pub use pitch::PitchProcessor;pub use pitch_simd::SimdPitchContourGenerator;pub use pitch_simd::SimdPitchProcessor;pub use precision_quality::ExpressionRecognitionReport;pub use precision_quality::NaturalnessScoreReport;pub use precision_quality::PitchAccuracyReport;pub use precision_quality::PrecisionQualityAnalyzer;pub use precision_quality::TimingAccuracyReport;pub use realtime::LiveSession;pub use realtime::RealtimeConfig;pub use realtime::RealtimeEngine;pub use realtime::RealtimeNote;pub use research_integration::ABTestManager;pub use research_integration::AdvancedCodecConfig;pub use research_integration::AdvancedNeuralCodec;pub use research_integration::BatchingStrategy;pub use research_integration::BottleneckDetector;pub use research_integration::BottleneckReport;pub use research_integration::BottleneckSeverity;pub use research_integration::CacheStats;pub use research_integration::CacheStrategy;pub use research_integration::CircuitBreaker;pub use research_integration::CircuitBreakerConfig;pub use research_integration::CircuitBreakerStats;pub use research_integration::CircuitState;pub use research_integration::CodebookUsageStats;pub use research_integration::CodecMetrics as Phase4CodecMetrics;pub use research_integration::CodecStats;pub use research_integration::CodecTokens;pub use research_integration::ComponentHealth;pub use research_integration::CompressionStats;pub use research_integration::ConditioningType;pub use research_integration::ConsistencyModel;pub use research_integration::ConsistencyModelConfig;pub use research_integration::CouplingMatrix;pub use research_integration::DiffusionTransformer;pub use research_integration::DiffusionTransformerConfig;pub use research_integration::DiffusionTransformerInfo;pub use research_integration::DistillationSchedule;pub use research_integration::ExecutionTrace;pub use research_integration::Experiment;pub use research_integration::ExperimentMetrics;pub use research_integration::ExperimentResults;pub use research_integration::ExperimentStatus;pub use research_integration::FallbackStrategy;pub use research_integration::FieldInterpolation;pub use research_integration::FlowMatchingConfig;pub use research_integration::FlowMatchingObjective;pub use research_integration::FlowMatchingSynthesizer;pub use research_integration::GracefulDegradationManager;pub use research_integration::HealthCheckThresholds;pub use research_integration::HealthChecker;pub use research_integration::HealthStatus;pub use research_integration::HistogramStats;pub use research_integration::HotReloader;pub use research_integration::ImprovedRVQ;pub use research_integration::InferenceConfig;pub use research_integration::InferenceMetrics;pub use research_integration::IntegrationMethod;pub use research_integration::InterpolationMethod as VelocityInterpolationMethod;pub use research_integration::LatencyOptimizer;pub use research_integration::LatencyStats;pub use research_integration::MetricMetadata;pub use research_integration::MetricType;pub use research_integration::MonitoringStats;pub use research_integration::MultiScaleDiscriminator;pub use research_integration::NeuralCodecConfig;pub use research_integration::NeuralCodecLanguageModel;pub use research_integration::NoiseSchedule;pub use research_integration::OpenTelemetryTracer;pub use research_integration::OptimalControl;pub use research_integration::OptimalTransportConfig;pub use research_integration::OptimalTransportFlow;pub use research_integration::PerceptualLoss;pub use research_integration::PerformanceProfiler;pub use research_integration::PipelineStage;pub use research_integration::PipelineStatistics;pub use research_integration::PipelineVisualizer;pub use research_integration::ProductionMonitor;pub use research_integration::ProfileData;pub use research_integration::ProfileSession;pub use research_integration::ProfilingReport;pub use research_integration::PrometheusMetrics;pub use research_integration::QualityLevel as DegradationQualityLevel;pub use research_integration::RealtimeInferenceEngine;pub use research_integration::RetryExecutor;pub use research_integration::RetryPolicy;pub use research_integration::SamplingMethod;pub use research_integration::ScoreBasedConfig;pub use research_integration::ScoreBasedModel;pub use research_integration::Span;pub use research_integration::SpanEvent;pub use research_integration::SpanStatus;pub use research_integration::StageMetrics;pub use research_integration::StageType;pub use research_integration::TrajectoryOptimization;pub use research_integration::TransportMethod;pub use research_integration::TransportPlan;pub use research_integration::Variant;pub use research_integration::VariantResult;pub use research_integration::VelocityConfig;pub use research_integration::VelocityEstimation;pub use research_integration::VelocityFieldPredictor;pub use research_integration::WassersteinDistance;pub use rhythm::RhythmGenerator;pub use rhythm::RhythmProcessor;pub use rhythm::TimingController;pub use scalability::LargeScaleSynthesisRequest;pub use scalability::LargeScaleSynthesisResult;pub use scalability::MultiVoiceCoordinator;pub use scalability::PerformanceMetrics;pub use scalability::ScalabilityConfig;pub use scalability::ScalabilityManager;pub use scalability::ScalabilityStatus;pub use scalability::SessionRequirements;pub use scalability::VoiceRequirements;pub use score::KeySignature;pub use score::Mode;pub use score::MusicalNote;pub use score::MusicalScore;pub use score::Note;pub use score::ScoreProcessor;pub use score::TimeSignature;pub use score_rendering::RenderConfig;pub use score_rendering::RenderFormat;pub use score_rendering::ScoreRenderer;pub use score_rendering::ScoreRendererBuilder;pub use score_rendering::StaffPosition;pub use styles::CulturalVariant;pub use styles::MusicalStyle;pub use styles::Ornamentation;pub use styles::PerformanceGuidelines;pub use styles::PhraseShaping;pub use styles::StyleCharacteristics;pub use styles::TimbreQualities;pub use styles::VoiceType as StyleVoiceType;pub use synthesis::PrecisionMetricsReport;pub use synthesis::PrecisionTargets;pub use synthesis::SynthesisEngine;pub use synthesis::SynthesisProcessor;pub use synthesis::SynthesisResult;pub use techniques::BreathControl;pub use techniques::LegatoProcessor;pub use techniques::SingingTechnique;pub use techniques::VibratoProcessor;pub use techniques::VocalFry;pub use types::Expression;pub use types::NoteEvent;pub use types::QualitySettings;pub use types::SingingRequest;pub use types::SingingResponse;pub use types::SingingStats;pub use types::VoiceCharacteristics;pub use types::VoiceType;pub use vocal_effects::AutoTuneEffect;pub use vocal_effects::ChoirEffect;pub use vocal_effects::HarmonyGenerator;pub use vocal_effects::ScaleType;pub use vocal_effects::VocoderEffect;pub use vocal_effects::VoiceArrangement;pub use vocal_effects::VoicePartType;pub use vocal_effects::VoicingRules;pub use voice::VoiceBank;pub use voice::VoiceController;pub use voice::VoiceManager;pub use voice_blending::BlendConfig;pub use voice_blending::BlendState;pub use voice_blending::VoiceBlender;pub use voice_blending::VoiceMorphParams;pub use voice_conversion::ConversionMethod;pub use voice_conversion::ConversionQuality;pub use voice_conversion::ConversionQualityMetrics;pub use voice_conversion::ConversionRequest;pub use voice_conversion::ConversionResult;pub use voice_conversion::ConversionSource;pub use voice_conversion::SpeakerEmbedding;pub use voice_conversion::VoiceConverter;pub use voice_conversion::VoiceQualityMetrics;pub use zero_shot::AdaptationMethod;pub use zero_shot::AdaptationMetrics;pub use zero_shot::AudioSample;pub use zero_shot::QualityMode;pub use zero_shot::ReferenceVoice;pub use zero_shot::TargetVoiceSpec;pub use zero_shot::VocalRange;pub use zero_shot::ZeroShotConfig;pub use zero_shot::ZeroShotRequest;pub use zero_shot::ZeroShotResult;pub use zero_shot::ZeroShotSynthesizer;
Modules§
- adaptive_
learning - Adaptive Learning System
- advanced_
techniques - Advanced singing techniques and vocal processing
- ai
- AI-Driven Features for Singing Voice Synthesis
- audio_
processing - Advanced audio processing for singing synthesis
- backends
- Backend implementations for singing synthesis (ONNX, etc.). Backend implementations for singing synthesis models.
- cloud_
deployment - Cloud deployment and distributed synthesis infrastructure Cloud Deployment and Distributed Synthesis
- composition_
assistant - AI-driven composition assistance for melody and harmony generation AI-Driven Composition Assistance
- config
- Configuration for singing synthesis
- core
- Core singing engine implementation
- custom_
score - Custom optimized score format for VoiRS singing synthesis
- effects
- Audio effects for singing synthesis
- emotion_
transfer - Real-time emotion transfer for dynamic emotion manipulation Real-Time Emotion Transfer System
- formats
- Musical format parsers
- gpu_
acceleration - GPU acceleration for neural singing synthesis.
- granular_
synthesis - Granular synthesis for special vocal effects
- harmony
- Multi-voice harmony synthesis and management
- historical_
practice - Historical Performance Practice for Singing Voice Synthesis
- llm_
understanding - LLM-based musical understanding and semantic analysis LLM-based Musical Understanding
- models
- Advanced singing models with neural synthesis
- multimodal
- Multimodal singing synthesis with audio-visual synchronization Multimodal Singing Synthesis - Audio-Visual Integration
- musical_
intelligence - Musical Intelligence Features
- perceptual_
quality - Perceptual Quality Testing Framework
- performance_
optimization - Auto-generated module structure
- performance_
optimization_ advanced - Advanced performance optimization with Neural Architecture Search
- physical_
modeling - Physical modeling of the vocal tract for realistic singing synthesis
- pitch
- Pitch processing and generation for singing synthesis
- pitch_
simd - SIMD-optimized pitch processing for high-performance synthesis SIMD-Optimized Pitch Processing
- precision_
quality - Precision Quality Metrics
- prelude
- Prelude module for convenient imports
- realtime
- Auto-generated module structure
- research_
integration - State-of-the-art research models integration
- rhythm
- Rhythm and timing processing
- scalability
- Scalability Enhancements
- score
- Musical score processing and representation
- score_
rendering - Musical score rendering for visual notation display
- streaming
- Advanced streaming synthesis with zero-copy pipeline (Phase 3) Advanced Streaming Synthesis (Phase 3)
- styles
- Musical style implementations for singing synthesis
- synthesis
- Singing synthesis engine and processing
- techniques
- Singing techniques and vocal processing
- types
- Type definitions for singing synthesis
- utils
- Utility functions for singing synthesis
- vocal_
effects - Vocal effects for singing synthesis
- voice
- Voice management and voice banking
- voice_
blending - Voice blending and morphing system
- voice_
conversion - Multi-speaker voice conversion for singing voice synthesis
- zero_
shot - Zero-shot singing voice synthesis
Enums§
- Error
- Error types for singing synthesis
Type Aliases§
- Result
- Result type for singing operations