pub struct AudioBuffer { /* private fields */ }Expand description
Audio buffer containing synthesized speech
Implementations§
Source§impl AudioBuffer
impl AudioBuffer
Sourcepub fn new(samples: Vec<f32>, sample_rate: u32, channels: u32) -> AudioBuffer
pub fn new(samples: Vec<f32>, sample_rate: u32, channels: u32) -> AudioBuffer
Create new audio buffer
Sourcepub fn samples_mut(&mut self) -> &mut [f32]
pub fn samples_mut(&mut self) -> &mut [f32]
Get mutable access to samples
Sourcepub fn sample_rate(&self) -> u32
pub fn sample_rate(&self) -> u32
Get sample rate in Hz
Sourcepub fn metadata(&self) -> &AudioMetadata
pub fn metadata(&self) -> &AudioMetadata
Get audio metadata
Sourcepub fn silence(
duration_seconds: f32,
sample_rate: u32,
channels: u32,
) -> AudioBuffer
pub fn silence( duration_seconds: f32, sample_rate: u32, channels: u32, ) -> AudioBuffer
Create silent audio buffer
Sourcepub fn sine_wave(
frequency: f32,
duration_seconds: f32,
sample_rate: u32,
amplitude: f32,
) -> AudioBuffer
pub fn sine_wave( frequency: f32, duration_seconds: f32, sample_rate: u32, amplitude: f32, ) -> AudioBuffer
Create audio buffer with sine wave (for testing)
Sourcepub fn with_samples(&self, samples: Vec<f32>) -> AudioBuffer
pub fn with_samples(&self, samples: Vec<f32>) -> AudioBuffer
Create a new buffer with the same format but different samples
Sourcepub fn clone_format(&self) -> BufferFormat
pub fn clone_format(&self) -> BufferFormat
Clone the buffer format without samples
Sourcepub fn from_format(format: &BufferFormat, samples: Vec<f32>) -> AudioBuffer
pub fn from_format(format: &BufferFormat, samples: Vec<f32>) -> AudioBuffer
Create buffer from format and samples
Source§impl AudioBuffer
impl AudioBuffer
Sourcepub fn save_wav(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>
pub fn save_wav(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>
Save audio as WAV file
Sourcepub fn save_wav_f32(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>
pub fn save_wav_f32(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>
Save audio as 32-bit float WAV file
Sourcepub fn save(
&self,
path: impl AsRef<Path>,
format: AudioFormat,
) -> Result<(), VoirsError>
pub fn save( &self, path: impl AsRef<Path>, format: AudioFormat, ) -> Result<(), VoirsError>
Save audio in specified format
Sourcepub fn save_flac(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>
pub fn save_flac(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>
Save audio as FLAC file
Sourcepub fn save_mp3(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>
pub fn save_mp3(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>
Save audio as MP3 file
Sourcepub fn save_ogg(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>
pub fn save_ogg(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>
Save audio as OGG file
Sourcepub fn save_opus(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>
pub fn save_opus(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>
Save audio as Opus file
Sourcepub fn play(&self) -> Result<(), VoirsError>
pub fn play(&self) -> Result<(), VoirsError>
Play audio through system speakers
Sourcepub fn play_with_callback<F>(&self, callback: F) -> Result<(), VoirsError>
pub fn play_with_callback<F>(&self, callback: F) -> Result<(), VoirsError>
Play audio with callback for progress updates
Sourcepub fn to_format(&self, format: AudioFormat) -> Result<Vec<u8>, VoirsError>
pub fn to_format(&self, format: AudioFormat) -> Result<Vec<u8>, VoirsError>
Convert to different format as bytes
Sourcepub fn to_wav_bytes(&self) -> Result<Vec<u8>, VoirsError>
pub fn to_wav_bytes(&self) -> Result<Vec<u8>, VoirsError>
Convert to WAV bytes
Sourcepub fn to_flac_bytes(&self) -> Result<Vec<u8>, VoirsError>
pub fn to_flac_bytes(&self) -> Result<Vec<u8>, VoirsError>
Convert to FLAC bytes
Sourcepub fn to_mp3_bytes(&self) -> Result<Vec<u8>, VoirsError>
pub fn to_mp3_bytes(&self) -> Result<Vec<u8>, VoirsError>
Convert to MP3 bytes
Sourcepub fn to_ogg_bytes(&self) -> Result<Vec<u8>, VoirsError>
pub fn to_ogg_bytes(&self) -> Result<Vec<u8>, VoirsError>
Convert to OGG bytes
Sourcepub fn to_opus_bytes(&self) -> Result<Vec<u8>, VoirsError>
pub fn to_opus_bytes(&self) -> Result<Vec<u8>, VoirsError>
Convert to Opus bytes
Sourcepub fn load_wav(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>
pub fn load_wav(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>
Load audio from WAV file
Sourcepub fn load(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>
pub fn load(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>
Load audio from file (auto-detect format)
Sourcepub fn load_flac(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>
pub fn load_flac(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>
Load audio from FLAC file
Sourcepub fn load_mp3(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>
pub fn load_mp3(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>
Load audio from MP3 file
Sourcepub fn load_ogg(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>
pub fn load_ogg(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>
Load audio from OGG file
Sourcepub fn load_opus(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>
pub fn load_opus(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>
Load audio from Opus file
Sourcepub fn get_info(path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>
pub fn get_info(path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>
Get audio information without loading samples
Sourcepub fn get_wav_info(path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>
pub fn get_wav_info(path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>
Get WAV file information
Sourcepub fn get_flac_info(path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>
pub fn get_flac_info(path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>
Get FLAC file information
Sourcepub fn get_mp3_info(path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>
pub fn get_mp3_info(path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>
Get MP3 file information
Sourcepub fn get_ogg_info(path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>
pub fn get_ogg_info(path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>
Get OGG file information
Sourcepub fn get_opus_info(_path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>
pub fn get_opus_info(_path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>
Get Opus file information
Sourcepub fn stream_to_callback<F>(
&self,
chunk_size: usize,
callback: F,
) -> Result<(), VoirsError>
pub fn stream_to_callback<F>( &self, chunk_size: usize, callback: F, ) -> Result<(), VoirsError>
Stream audio to callback function (for real-time processing)
Sourcepub fn export_metadata(&self) -> Result<String, VoirsError>
pub fn export_metadata(&self) -> Result<String, VoirsError>
Export audio metadata as JSON
Sourcepub fn from_raw_bytes(
bytes: &[u8],
sample_rate: u32,
channels: u32,
format: RawFormat,
) -> Result<AudioBuffer, VoirsError>
pub fn from_raw_bytes( bytes: &[u8], sample_rate: u32, channels: u32, format: RawFormat, ) -> Result<AudioBuffer, VoirsError>
Create audio buffer from raw bytes
Source§impl AudioBuffer
impl AudioBuffer
Sourcepub fn resample(&self, target_rate: u32) -> Result<AudioBuffer, VoirsError>
pub fn resample(&self, target_rate: u32) -> Result<AudioBuffer, VoirsError>
Convert to different sample rate
Sourcepub fn apply_gain(&mut self, gain_db: f32) -> Result<(), VoirsError>
pub fn apply_gain(&mut self, gain_db: f32) -> Result<(), VoirsError>
Apply gain to audio (in dB)
Uses SIMD acceleration for improved performance on large buffers.
Sourcepub fn normalize(&mut self, target_peak: f32) -> Result<(), VoirsError>
pub fn normalize(&mut self, target_peak: f32) -> Result<(), VoirsError>
Normalize audio to peak amplitude
Uses SIMD acceleration for improved performance on large buffers.
Sourcepub fn mix(&mut self, other: &AudioBuffer, gain: f32) -> Result<(), VoirsError>
pub fn mix(&mut self, other: &AudioBuffer, gain: f32) -> Result<(), VoirsError>
Mix with another audio buffer
Uses SIMD acceleration (FMA - fused multiply-add) for improved performance on large buffers.
Sourcepub fn append(&mut self, other: &AudioBuffer) -> Result<(), VoirsError>
pub fn append(&mut self, other: &AudioBuffer) -> Result<(), VoirsError>
Append another audio buffer
Sourcepub fn split(
&self,
time_seconds: f32,
) -> Result<(AudioBuffer, AudioBuffer), VoirsError>
pub fn split( &self, time_seconds: f32, ) -> Result<(AudioBuffer, AudioBuffer), VoirsError>
Split audio buffer at given time (in seconds)
Sourcepub fn fade_in(&mut self, duration_seconds: f32) -> Result<(), VoirsError>
pub fn fade_in(&mut self, duration_seconds: f32) -> Result<(), VoirsError>
Fade in over specified duration
Sourcepub fn fade_out(&mut self, duration_seconds: f32) -> Result<(), VoirsError>
pub fn fade_out(&mut self, duration_seconds: f32) -> Result<(), VoirsError>
Fade out over specified duration
Sourcepub fn crossfade(
&mut self,
other: &AudioBuffer,
crossfade_duration: f32,
) -> Result<(), VoirsError>
pub fn crossfade( &mut self, other: &AudioBuffer, crossfade_duration: f32, ) -> Result<(), VoirsError>
Apply cross-fade between two buffers
Sourcepub fn lowpass_filter(
&mut self,
cutoff_frequency: f32,
) -> Result<(), VoirsError>
pub fn lowpass_filter( &mut self, cutoff_frequency: f32, ) -> Result<(), VoirsError>
Apply a simple lowpass filter
Sourcepub fn highpass_filter(
&mut self,
cutoff_frequency: f32,
) -> Result<(), VoirsError>
pub fn highpass_filter( &mut self, cutoff_frequency: f32, ) -> Result<(), VoirsError>
Apply a simple highpass filter
Sourcepub fn time_stretch(
&self,
stretch_factor: f32,
) -> Result<AudioBuffer, VoirsError>
pub fn time_stretch( &self, stretch_factor: f32, ) -> Result<AudioBuffer, VoirsError>
Apply time stretching (simple pitch-preserving speed change)
Sourcepub fn pitch_shift(&self, semitones: f32) -> Result<AudioBuffer, VoirsError>
pub fn pitch_shift(&self, semitones: f32) -> Result<AudioBuffer, VoirsError>
Apply pitch shifting using phase vocoder algorithm
Sourcepub fn pitch_shift_psola(
&self,
semitones: f32,
) -> Result<AudioBuffer, VoirsError>
pub fn pitch_shift_psola( &self, semitones: f32, ) -> Result<AudioBuffer, VoirsError>
Apply pitch shifting using PSOLA (Pitch Synchronous Overlap and Add) algorithm This method is more suitable for speech and preserves formants better
Sourcepub fn compress(
&mut self,
threshold: f32,
ratio: f32,
attack_ms: f32,
release_ms: f32,
) -> Result<(), VoirsError>
pub fn compress( &mut self, threshold: f32, ratio: f32, attack_ms: f32, release_ms: f32, ) -> Result<(), VoirsError>
Apply dynamic range compression
Sourcepub fn reverb(
&mut self,
room_size: f32,
damping: f32,
wet_level: f32,
) -> Result<(), VoirsError>
pub fn reverb( &mut self, room_size: f32, damping: f32, wet_level: f32, ) -> Result<(), VoirsError>
Apply reverb effect (simple delay-based reverb)
Sourcepub fn extract(
&self,
start_seconds: f32,
duration_seconds: f32,
) -> Result<AudioBuffer, VoirsError>
pub fn extract( &self, start_seconds: f32, duration_seconds: f32, ) -> Result<AudioBuffer, VoirsError>
Extract a portion of the audio buffer
Sourcepub fn rms(&self) -> f32
pub fn rms(&self) -> f32
Calculate RMS (Root Mean Square) value for loudness
Uses SIMD acceleration for improved performance on large buffers.
Sourcepub fn peak(&self) -> f32
pub fn peak(&self) -> f32
Calculate peak amplitude
Uses SIMD acceleration for improved performance on large buffers.
Sourcepub fn is_clipped(&self, threshold: f32) -> bool
pub fn is_clipped(&self, threshold: f32) -> bool
Check if audio contains clipping
Source§impl AudioBuffer
Audio buffer utilities
impl AudioBuffer
Audio buffer utilities
Sourcepub fn concatenate(buffers: &[AudioBuffer]) -> Result<AudioBuffer, VoirsError>
pub fn concatenate(buffers: &[AudioBuffer]) -> Result<AudioBuffer, VoirsError>
Concatenate multiple audio buffers
Buffers must have the same sample rate and channel configuration.
§Examples
use voirs_sdk::audio::AudioBuffer;
let buf1 = AudioBuffer::mono(vec![1.0, 2.0, 3.0], 22050);
let buf2 = AudioBuffer::mono(vec![4.0, 5.0, 6.0], 22050);
let buf3 = AudioBuffer::mono(vec![7.0, 8.0, 9.0], 22050);
let concatenated = AudioBuffer::concatenate(&[buf1, buf2, buf3]).unwrap();
assert_eq!(concatenated.len(), 9);Sourcepub fn pad(&mut self, before_seconds: f32, after_seconds: f32)
pub fn pad(&mut self, before_seconds: f32, after_seconds: f32)
Pad buffer with silence
Adds silence before and/or after the audio.
§Arguments
before_seconds- Silence duration before audioafter_seconds- Silence duration after audio
§Examples
use voirs_sdk::audio::AudioBuffer;
let mut buffer = AudioBuffer::mono(vec![1.0; 100], 22050);
buffer.pad(0.1, 0.2); // Add 0.1s before, 0.2s afterSourcepub fn has_clipping(&self) -> bool
pub fn has_clipping(&self) -> bool
Check if buffer contains clipping (samples outside [-1.0, 1.0])
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5, 1.5, 0.3], 22050);
assert!(buffer.has_clipping());Sourcepub fn count_clipped_samples(&self) -> usize
pub fn count_clipped_samples(&self) -> usize
Count number of clipped samples
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5, 1.5, -1.2, 0.3], 22050);
assert_eq!(buffer.count_clipped_samples(), 2);Sourcepub fn rms_db(&self) -> f32
pub fn rms_db(&self) -> f32
Get RMS (Root Mean Square) level in dB
Returns -∞ for silence.
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 1000], 22050);
let rms_db = buffer.rms_db();Sourcepub fn peak_db(&self) -> f32
pub fn peak_db(&self) -> f32
Get peak level in dB
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 1000], 22050);
let peak_db = buffer.peak_db();Sourcepub fn zero_crossing_rate(&self) -> f32
pub fn zero_crossing_rate(&self) -> f32
Calculate zero-crossing rate (ZCR)
ZCR is the rate at which the signal changes sign, useful for voice activity detection and audio classification.
§Returns
The zero-crossing rate as a fraction of the total samples.
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5, -0.3, 0.2, -0.1, 0.4], 22050);
let zcr = buffer.zero_crossing_rate();
println!("Zero-crossing rate: {:.4}", zcr);Sourcepub fn spectral_centroid(&self) -> f32
pub fn spectral_centroid(&self) -> f32
Calculate spectral centroid
The spectral centroid is the “center of mass” of the spectrum, indicating where the majority of the signal’s energy is concentrated. Higher values indicate brighter sounds.
§Returns
The spectral centroid in Hz, or 0.0 if calculation fails.
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 1024], 22050);
let centroid = buffer.spectral_centroid();
println!("Spectral centroid: {:.2} Hz", centroid);Sourcepub fn spectral_rolloff(&self, threshold: f32) -> f32
pub fn spectral_rolloff(&self, threshold: f32) -> f32
Calculate spectral rolloff
The spectral rolloff is the frequency below which a specified percentage (typically 85%) of the total spectral energy is contained.
§Arguments
threshold- The energy threshold (0.0 to 1.0), typically 0.85
§Returns
The rolloff frequency in Hz, or 0.0 if calculation fails.
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 1024], 22050);
let rolloff = buffer.spectral_rolloff(0.85);
println!("Spectral rolloff: {:.2} Hz", rolloff);Sourcepub fn signal_to_noise_ratio(&self) -> f32
pub fn signal_to_noise_ratio(&self) -> f32
Calculate signal-to-noise ratio (SNR) in dB
Estimates SNR by comparing signal power to noise floor power. Uses a simple heuristic: assumes the quietest 10% of frames represent noise.
§Returns
SNR in dB, or 0.0 if calculation fails.
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 1000], 22050);
let snr = buffer.signal_to_noise_ratio();
println!("SNR: {:.2} dB", snr);Sourcepub fn crest_factor(&self) -> f32
pub fn crest_factor(&self) -> f32
Calculate crest factor (peak-to-RMS ratio) in dB
Crest factor indicates the dynamic range of the audio. Higher values indicate more dynamic content with prominent peaks.
§Returns
Crest factor in dB.
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 1000], 22050);
let crest = buffer.crest_factor();
println!("Crest factor: {:.2} dB", crest);Sourcepub fn detect_silence(
&self,
threshold_db: f32,
min_duration: f32,
) -> Vec<(f32, f32)>
pub fn detect_silence( &self, threshold_db: f32, min_duration: f32, ) -> Vec<(f32, f32)>
Detect silence segments in the audio
Returns a list of (start_time, end_time) tuples in seconds representing detected silence segments.
§Arguments
threshold_db- Silence threshold in dB (e.g., -40.0)min_duration- Minimum silence duration in seconds
§Returns
Vector of (start, end) time pairs in seconds.
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 1000], 22050);
let silences = buffer.detect_silence(-40.0, 0.1);
for (start, end) in silences {
println!("Silence: {:.2}s to {:.2}s", start, end);
}Sourcepub fn mfcc(
&self,
num_coeffs: usize,
num_filters: usize,
fft_size: usize,
) -> Vec<f32>
pub fn mfcc( &self, num_coeffs: usize, num_filters: usize, fft_size: usize, ) -> Vec<f32>
Calculate Mel-Frequency Cepstral Coefficients (MFCCs)
MFCCs are widely used in speech and audio processing for feature extraction. They represent the short-term power spectrum of a sound on a mel scale.
§Arguments
num_coeffs- Number of MFCC coefficients to extract (typically 13)num_filters- Number of mel-scale filters (typically 26-40)fft_size- FFT size (power of 2, typically 512-2048)
§Returns
Vector of MFCC coefficients, or empty vector if calculation fails.
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 1024], 22050);
let mfccs = buffer.mfcc(13, 26, 512);
println!("MFCC coefficients: {:?}", mfccs);Sourcepub fn detect_pitch_autocorr(&self, min_freq: f32, max_freq: f32) -> f32
pub fn detect_pitch_autocorr(&self, min_freq: f32, max_freq: f32) -> f32
Detect pitch using autocorrelation
Uses the autocorrelation method to estimate the fundamental frequency (F0) of the audio signal. This is more robust than simple zero-crossing rate.
§Arguments
min_freq- Minimum expected frequency in Hz (e.g., 80 for male voice)max_freq- Maximum expected frequency in Hz (e.g., 400 for female voice)
§Returns
Estimated pitch in Hz, or 0.0 if no pitch detected.
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 1024], 22050);
let pitch = buffer.detect_pitch_autocorr(80.0, 400.0);
println!("Detected pitch: {:.2} Hz", pitch);Sourcepub fn spectral_flux(
&self,
prev_buffer: Option<&AudioBuffer>,
fft_size: usize,
) -> f32
pub fn spectral_flux( &self, prev_buffer: Option<&AudioBuffer>, fft_size: usize, ) -> f32
Calculate spectral flux
Spectral flux measures the rate of change in the power spectrum, useful for detecting onsets and transients in audio.
§Arguments
prev_buffer- Previous audio buffer for comparison (optional)fft_size- FFT size for spectral analysis
§Returns
Spectral flux value, or 0.0 if calculation fails.
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer1 = AudioBuffer::mono(vec![0.5; 1024], 22050);
let buffer2 = AudioBuffer::mono(vec![0.6; 1024], 22050);
let flux = buffer2.spectral_flux(Some(&buffer1), 512);
println!("Spectral flux: {:.4}", flux);Sourcepub fn estimate_formants(&self, num_formants: usize) -> Vec<f32>
pub fn estimate_formants(&self, num_formants: usize) -> Vec<f32>
Estimate formant frequencies
Formants are resonant frequencies of the vocal tract, crucial for vowel identification and speaker characteristics. This uses LPC (Linear Predictive Coding) analysis to estimate the first 4 formants.
§Arguments
num_formants- Number of formants to estimate (typically 3-4)
§Returns
Vector of estimated formant frequencies in Hz.
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 1024], 22050);
let formants = buffer.estimate_formants(4);
println!("Formants: {:?} Hz", formants);Sourcepub fn calculate_jitter(&self, min_freq: f32, max_freq: f32) -> f32
pub fn calculate_jitter(&self, min_freq: f32, max_freq: f32) -> f32
Calculate Jitter (pitch period irregularity)
Jitter measures the cycle-to-cycle variation in fundamental frequency (F0), expressed as a percentage. It’s a crucial indicator of voice quality and pathology. Higher jitter indicates more irregular vocal fold vibration.
This implements the Jitter (local) metric, which is the average absolute difference between consecutive periods, divided by the average period.
§Arguments
min_freq- Minimum expected pitch (e.g., 75 Hz for male voice)max_freq- Maximum expected pitch (e.g., 500 Hz for female voice)
§Returns
Jitter percentage (0-100), or 0.0 if calculation fails. Typical values:
- < 1.0%: Normal voice quality
- 1.0-2.0%: Mild irregularity
-
2.0%: Potential voice pathology
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 4096], 22050);
let jitter = buffer.calculate_jitter(75.0, 500.0);
println!("Jitter: {:.2}%", jitter);Sourcepub fn calculate_shimmer(&self, min_freq: f32, max_freq: f32) -> f32
pub fn calculate_shimmer(&self, min_freq: f32, max_freq: f32) -> f32
Calculate Shimmer (amplitude variation)
Shimmer measures the cycle-to-cycle variation in amplitude, expressed as a percentage. It’s an important indicator of voice quality and vocal fold irregularities. Higher shimmer indicates unstable voice production.
This implements the Shimmer (local) metric, which is the average absolute difference between consecutive peak amplitudes, divided by the average amplitude.
§Arguments
min_freq- Minimum expected pitch for period detectionmax_freq- Maximum expected pitch for period detection
§Returns
Shimmer percentage (0-100), or 0.0 if calculation fails. Typical values:
- < 3.0%: Normal voice quality
- 3.0-6.0%: Mild amplitude variation
-
6.0%: Potential voice pathology
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 4096], 22050);
let shimmer = buffer.calculate_shimmer(75.0, 500.0);
println!("Shimmer: {:.2}%", shimmer);Sourcepub fn calculate_hnr(&self, min_freq: f32, max_freq: f32) -> f32
pub fn calculate_hnr(&self, min_freq: f32, max_freq: f32) -> f32
Calculate Harmonic-to-Noise Ratio (HNR)
HNR measures the ratio of harmonic (periodic) to noise (aperiodic) energy in the voice signal. It’s a fundamental measure of voice quality. Higher HNR indicates clearer, more periodic voice production.
This uses autocorrelation-based method to separate harmonic and noise components.
§Arguments
min_freq- Minimum expected pitchmax_freq- Maximum expected pitch
§Returns
HNR in decibels (dB). Typical values:
-
20 dB: Excellent voice quality
- 10-20 dB: Good voice quality
- 5-10 dB: Fair voice quality
- < 5 dB: Poor voice quality or pathology
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 4096], 22050);
let hnr = buffer.calculate_hnr(75.0, 500.0);
println!("HNR: {:.2} dB", hnr);Sourcepub fn calculate_delta_mfcc(
mfcc_frames: &[Vec<f32>],
delta_window: usize,
) -> Vec<Vec<f32>>
pub fn calculate_delta_mfcc( mfcc_frames: &[Vec<f32>], delta_window: usize, ) -> Vec<Vec<f32>>
Calculate Delta MFCCs (first-order temporal derivatives)
Delta coefficients represent the rate of change of MFCCs over time, capturing dynamic spectral information. These are essential for improving speech recognition accuracy.
§Arguments
mfcc_frames- Vector of MFCC coefficient vectors from consecutive framesdelta_window- Number of frames to use for delta calculation (typically 2)
§Returns
Vector of delta MFCC vectors, one per input frame.
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 8192], 22050);
// Extract MFCCs from multiple frames...
let mfcc_frames = vec![
buffer.mfcc(13, 26, 512),
buffer.mfcc(13, 26, 512),
buffer.mfcc(13, 26, 512),
];
let delta_mfccs = AudioBuffer::calculate_delta_mfcc(&mfcc_frames, 2);Sourcepub fn calculate_delta_delta_mfcc(
delta_mfccs: &[Vec<f32>],
delta_window: usize,
) -> Vec<Vec<f32>>
pub fn calculate_delta_delta_mfcc( delta_mfccs: &[Vec<f32>], delta_window: usize, ) -> Vec<Vec<f32>>
Calculate Delta-Delta MFCCs (second-order temporal derivatives)
Delta-Delta (acceleration) coefficients represent the rate of change of Delta coefficients, capturing the dynamics of spectral dynamics. Combined with MFCCs and Deltas, they form a powerful feature set for ASR.
§Arguments
delta_mfccs- Vector of delta MFCC coefficient vectorsdelta_window- Number of frames to use (typically 2)
§Returns
Vector of delta-delta MFCC vectors.
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 8192], 22050);
let mfcc_frames = vec![
buffer.mfcc(13, 26, 512),
buffer.mfcc(13, 26, 512),
buffer.mfcc(13, 26, 512),
];
let delta_mfccs = AudioBuffer::calculate_delta_mfcc(&mfcc_frames, 2);
let delta_delta_mfccs = AudioBuffer::calculate_delta_delta_mfcc(&delta_mfccs, 2);Sourcepub fn chroma_features(&self, fft_size: usize, ref_freq: f32) -> Vec<f32>
pub fn chroma_features(&self, fft_size: usize, ref_freq: f32) -> Vec<f32>
Calculate Chroma features (pitch class representation)
Chroma features, also known as pitch class profiles or chromagrams, represent the intensity of the 12 pitch classes (C, C#, D, …, B) regardless of octave. This is particularly useful for music information retrieval, chord recognition, and key detection.
The algorithm maps each frequency bin to one of 12 pitch classes using: pitch_class = 12 * log2(freq / ref_freq) mod 12
§Arguments
fft_size- FFT size (must be power of 2, typically 2048 or 4096 for music)ref_freq- Reference frequency for A4 (default: 440.0 Hz)
§Returns
12-element vector representing energy in each pitch class (C=0, C#=1, …, B=11). Returns empty vector if insufficient samples.
§Applications
- Music information retrieval
- Chord recognition and key detection
- Cover song identification
- Music similarity analysis
- Tonal music analysis
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 8192], 22050);
let chroma = buffer.chroma_features(2048, 440.0);
assert_eq!(chroma.len(), 12);Sourcepub fn spectral_contrast(&self, fft_size: usize, num_bands: usize) -> Vec<f32>
pub fn spectral_contrast(&self, fft_size: usize, num_bands: usize) -> Vec<f32>
Calculate Spectral Contrast
Spectral contrast measures the difference between peaks and valleys in the spectrum across multiple frequency bands. This provides a robust timbre representation, particularly useful for music genre classification and audio texture analysis.
The spectrum is divided into sub-bands, and for each band:
- Peak: Mean of top 20% of magnitudes
- Valley: Mean of bottom 20% of magnitudes
- Contrast: Peak - Valley (in dB)
§Arguments
fft_size- FFT size (must be power of 2, typically 2048)num_bands- Number of frequency bands (typically 6-8)
§Returns
Vector of contrast values (in dB) for each frequency band. Returns empty vector if insufficient samples.
§Applications
- Music genre classification
- Audio texture analysis
- Instrument recognition
- Timbre characterization
- Sound quality assessment
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 8192], 22050);
let contrast = buffer.spectral_contrast(2048, 6);
assert_eq!(contrast.len(), 6);Sourcepub fn detect_pitch_yin(
&self,
min_freq: f32,
max_freq: f32,
threshold: f32,
) -> f32
pub fn detect_pitch_yin( &self, min_freq: f32, max_freq: f32, threshold: f32, ) -> f32
Detect pitch using the YIN algorithm
The YIN algorithm is a robust pitch detection method that improves upon autocorrelation by using a cumulative mean normalized difference function. It provides more accurate pitch detection, especially for noisy signals.
YIN steps:
- Calculate difference function
- Apply cumulative mean normalization
- Find first minimum below threshold
- Apply parabolic interpolation for sub-sample accuracy
Reference: “YIN, a fundamental frequency estimator for speech and music” by Alain de Cheveigné and Hideki Kawahara (2002)
§Arguments
min_freq- Minimum frequency to search (Hz)max_freq- Maximum frequency to search (Hz)threshold- Threshold for minimum detection (typically 0.1-0.2)
§Returns
Detected fundamental frequency in Hz, or 0.0 if no pitch detected.
§Applications
- High-accuracy pitch tracking
- Music transcription
- Singing voice analysis
- Instrument tuning
- Prosody analysis in speech
§Examples
use voirs_sdk::audio::AudioBuffer;
let buffer = AudioBuffer::mono(vec![0.5; 8192], 22050);
let pitch = buffer.detect_pitch_yin(80.0, 400.0, 0.15);
if pitch > 0.0 {
println!("Detected pitch: {:.1} Hz", pitch);
}Trait Implementations§
Source§impl Clone for AudioBuffer
impl Clone for AudioBuffer
Source§fn clone(&self) -> AudioBuffer
fn clone(&self) -> AudioBuffer
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for AudioBuffer
impl Debug for AudioBuffer
Source§impl<'de> Deserialize<'de> for AudioBuffer
impl<'de> Deserialize<'de> for AudioBuffer
Source§fn deserialize<__D>(
__deserializer: __D,
) -> Result<AudioBuffer, <__D as Deserializer<'de>>::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(
__deserializer: __D,
) -> Result<AudioBuffer, <__D as Deserializer<'de>>::Error>where
__D: Deserializer<'de>,
Source§impl Serialize for AudioBuffer
impl Serialize for AudioBuffer
Source§fn serialize<__S>(
&self,
__serializer: __S,
) -> Result<<__S as Serializer>::Ok, <__S as Serializer>::Error>where
__S: Serializer,
fn serialize<__S>(
&self,
__serializer: __S,
) -> Result<<__S as Serializer>::Ok, <__S as Serializer>::Error>where
__S: Serializer,
Auto Trait Implementations§
impl Freeze for AudioBuffer
impl RefUnwindSafe for AudioBuffer
impl Send for AudioBuffer
impl Sync for AudioBuffer
impl Unpin for AudioBuffer
impl UnwindSafe for AudioBuffer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<S> FromSample<S> for S
impl<S> FromSample<S> for S
fn from_sample_(s: S) -> S
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<F, T> IntoSample<T> for Fwhere
T: FromSample<F>,
impl<F, T> IntoSample<T> for Fwhere
T: FromSample<F>,
fn into_sample(self) -> T
Source§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<T> PolicyExt for Twhere
T: ?Sized,
impl<T> PolicyExt for Twhere
T: ?Sized,
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
self to the equivalent element of its superset.