Skip to main content

AudioBuffer

Struct AudioBuffer 

Source
pub struct AudioBuffer { /* private fields */ }
Expand description

Audio buffer containing synthesized speech

Implementations§

Source§

impl AudioBuffer

Source

pub fn new(samples: Vec<f32>, sample_rate: u32, channels: u32) -> AudioBuffer

Create new audio buffer

Source

pub fn mono(samples: Vec<f32>, sample_rate: u32) -> AudioBuffer

Create mono audio buffer

Source

pub fn stereo(samples: Vec<f32>, sample_rate: u32) -> AudioBuffer

Create stereo audio buffer

Source

pub fn samples(&self) -> &[f32]

Get audio samples as slice

Source

pub fn samples_mut(&mut self) -> &mut [f32]

Get mutable access to samples

Source

pub fn sample_rate(&self) -> u32

Get sample rate in Hz

Source

pub fn channels(&self) -> u32

Get number of channels

Source

pub fn duration(&self) -> f32

Get duration in seconds

Source

pub fn metadata(&self) -> &AudioMetadata

Get audio metadata

Source

pub fn len(&self) -> usize

Get number of samples

Source

pub fn is_empty(&self) -> bool

Check if buffer is empty

Source

pub fn silence( duration_seconds: f32, sample_rate: u32, channels: u32, ) -> AudioBuffer

Create silent audio buffer

Source

pub fn sine_wave( frequency: f32, duration_seconds: f32, sample_rate: u32, amplitude: f32, ) -> AudioBuffer

Create audio buffer with sine wave (for testing)

Source

pub fn with_samples(&self, samples: Vec<f32>) -> AudioBuffer

Create a new buffer with the same format but different samples

Source

pub fn clone_format(&self) -> BufferFormat

Clone the buffer format without samples

Source

pub fn from_format(format: &BufferFormat, samples: Vec<f32>) -> AudioBuffer

Create buffer from format and samples

Source§

impl AudioBuffer

Source

pub fn save_wav(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>

Save audio as WAV file

Source

pub fn save_wav_f32(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>

Save audio as 32-bit float WAV file

Source

pub fn save( &self, path: impl AsRef<Path>, format: AudioFormat, ) -> Result<(), VoirsError>

Save audio in specified format

Source

pub fn save_flac(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>

Save audio as FLAC file

Source

pub fn save_mp3(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>

Save audio as MP3 file

Source

pub fn save_ogg(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>

Save audio as OGG file

Source

pub fn save_opus(&self, path: impl AsRef<Path>) -> Result<(), VoirsError>

Save audio as Opus file

Source

pub fn play(&self) -> Result<(), VoirsError>

Play audio through system speakers

Source

pub fn play_with_callback<F>(&self, callback: F) -> Result<(), VoirsError>
where F: FnMut(f32) + Send + 'static,

Play audio with callback for progress updates

Source

pub fn to_format(&self, format: AudioFormat) -> Result<Vec<u8>, VoirsError>

Convert to different format as bytes

Source

pub fn to_wav_bytes(&self) -> Result<Vec<u8>, VoirsError>

Convert to WAV bytes

Source

pub fn to_flac_bytes(&self) -> Result<Vec<u8>, VoirsError>

Convert to FLAC bytes

Source

pub fn to_mp3_bytes(&self) -> Result<Vec<u8>, VoirsError>

Convert to MP3 bytes

Source

pub fn to_ogg_bytes(&self) -> Result<Vec<u8>, VoirsError>

Convert to OGG bytes

Source

pub fn to_opus_bytes(&self) -> Result<Vec<u8>, VoirsError>

Convert to Opus bytes

Source

pub fn load_wav(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>

Load audio from WAV file

Source

pub fn load(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>

Load audio from file (auto-detect format)

Source

pub fn load_flac(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>

Load audio from FLAC file

Source

pub fn load_mp3(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>

Load audio from MP3 file

Source

pub fn load_ogg(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>

Load audio from OGG file

Source

pub fn load_opus(path: impl AsRef<Path>) -> Result<AudioBuffer, VoirsError>

Load audio from Opus file

Source

pub fn get_info(path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>

Get audio information without loading samples

Source

pub fn get_wav_info(path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>

Get WAV file information

Source

pub fn get_flac_info(path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>

Get FLAC file information

Source

pub fn get_mp3_info(path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>

Get MP3 file information

Source

pub fn get_ogg_info(path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>

Get OGG file information

Source

pub fn get_opus_info(_path: impl AsRef<Path>) -> Result<AudioInfo, VoirsError>

Get Opus file information

Source

pub fn stream_to_callback<F>( &self, chunk_size: usize, callback: F, ) -> Result<(), VoirsError>
where F: FnMut(&[f32]) -> Result<(), VoirsError>,

Stream audio to callback function (for real-time processing)

Source

pub fn export_metadata(&self) -> Result<String, VoirsError>

Export audio metadata as JSON

Source

pub fn from_raw_bytes( bytes: &[u8], sample_rate: u32, channels: u32, format: RawFormat, ) -> Result<AudioBuffer, VoirsError>

Create audio buffer from raw bytes

Source§

impl AudioBuffer

Source

pub fn resample(&self, target_rate: u32) -> Result<AudioBuffer, VoirsError>

Convert to different sample rate

Source

pub fn apply_gain(&mut self, gain_db: f32) -> Result<(), VoirsError>

Apply gain to audio (in dB)

Uses SIMD acceleration for improved performance on large buffers.

Source

pub fn normalize(&mut self, target_peak: f32) -> Result<(), VoirsError>

Normalize audio to peak amplitude

Uses SIMD acceleration for improved performance on large buffers.

Source

pub fn mix(&mut self, other: &AudioBuffer, gain: f32) -> Result<(), VoirsError>

Mix with another audio buffer

Uses SIMD acceleration (FMA - fused multiply-add) for improved performance on large buffers.

Source

pub fn append(&mut self, other: &AudioBuffer) -> Result<(), VoirsError>

Append another audio buffer

Source

pub fn split( &self, time_seconds: f32, ) -> Result<(AudioBuffer, AudioBuffer), VoirsError>

Split audio buffer at given time (in seconds)

Source

pub fn fade_in(&mut self, duration_seconds: f32) -> Result<(), VoirsError>

Fade in over specified duration

Source

pub fn fade_out(&mut self, duration_seconds: f32) -> Result<(), VoirsError>

Fade out over specified duration

Source

pub fn crossfade( &mut self, other: &AudioBuffer, crossfade_duration: f32, ) -> Result<(), VoirsError>

Apply cross-fade between two buffers

Source

pub fn lowpass_filter( &mut self, cutoff_frequency: f32, ) -> Result<(), VoirsError>

Apply a simple lowpass filter

Source

pub fn highpass_filter( &mut self, cutoff_frequency: f32, ) -> Result<(), VoirsError>

Apply a simple highpass filter

Source

pub fn time_stretch( &self, stretch_factor: f32, ) -> Result<AudioBuffer, VoirsError>

Apply time stretching (simple pitch-preserving speed change)

Source

pub fn pitch_shift(&self, semitones: f32) -> Result<AudioBuffer, VoirsError>

Apply pitch shifting using phase vocoder algorithm

Source

pub fn pitch_shift_psola( &self, semitones: f32, ) -> Result<AudioBuffer, VoirsError>

Apply pitch shifting using PSOLA (Pitch Synchronous Overlap and Add) algorithm This method is more suitable for speech and preserves formants better

Source

pub fn compress( &mut self, threshold: f32, ratio: f32, attack_ms: f32, release_ms: f32, ) -> Result<(), VoirsError>

Apply dynamic range compression

Source

pub fn reverb( &mut self, room_size: f32, damping: f32, wet_level: f32, ) -> Result<(), VoirsError>

Apply reverb effect (simple delay-based reverb)

Source

pub fn extract( &self, start_seconds: f32, duration_seconds: f32, ) -> Result<AudioBuffer, VoirsError>

Extract a portion of the audio buffer

Source

pub fn rms(&self) -> f32

Calculate RMS (Root Mean Square) value for loudness

Uses SIMD acceleration for improved performance on large buffers.

Source

pub fn peak(&self) -> f32

Calculate peak amplitude

Uses SIMD acceleration for improved performance on large buffers.

Source

pub fn is_clipped(&self, threshold: f32) -> bool

Check if audio contains clipping

Source

pub fn soft_clip(&mut self, threshold: f32) -> Result<(), VoirsError>

Apply soft clipping to prevent harsh distortion

Source§

impl AudioBuffer

Audio buffer utilities

Source

pub fn concatenate(buffers: &[AudioBuffer]) -> Result<AudioBuffer, VoirsError>

Concatenate multiple audio buffers

Buffers must have the same sample rate and channel configuration.

§Examples
use voirs_sdk::audio::AudioBuffer;

let buf1 = AudioBuffer::mono(vec![1.0, 2.0, 3.0], 22050);
let buf2 = AudioBuffer::mono(vec![4.0, 5.0, 6.0], 22050);
let buf3 = AudioBuffer::mono(vec![7.0, 8.0, 9.0], 22050);

let concatenated = AudioBuffer::concatenate(&[buf1, buf2, buf3]).unwrap();
assert_eq!(concatenated.len(), 9);
Source

pub fn pad(&mut self, before_seconds: f32, after_seconds: f32)

Pad buffer with silence

Adds silence before and/or after the audio.

§Arguments
  • before_seconds - Silence duration before audio
  • after_seconds - Silence duration after audio
§Examples
use voirs_sdk::audio::AudioBuffer;

let mut buffer = AudioBuffer::mono(vec![1.0; 100], 22050);
buffer.pad(0.1, 0.2); // Add 0.1s before, 0.2s after
Source

pub fn has_clipping(&self) -> bool

Check if buffer contains clipping (samples outside [-1.0, 1.0])

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5, 1.5, 0.3], 22050);
assert!(buffer.has_clipping());
Source

pub fn count_clipped_samples(&self) -> usize

Count number of clipped samples

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5, 1.5, -1.2, 0.3], 22050);
assert_eq!(buffer.count_clipped_samples(), 2);
Source

pub fn rms_db(&self) -> f32

Get RMS (Root Mean Square) level in dB

Returns -∞ for silence.

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 1000], 22050);
let rms_db = buffer.rms_db();
Source

pub fn peak_db(&self) -> f32

Get peak level in dB

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 1000], 22050);
let peak_db = buffer.peak_db();
Source

pub fn zero_crossing_rate(&self) -> f32

Calculate zero-crossing rate (ZCR)

ZCR is the rate at which the signal changes sign, useful for voice activity detection and audio classification.

§Returns

The zero-crossing rate as a fraction of the total samples.

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5, -0.3, 0.2, -0.1, 0.4], 22050);
let zcr = buffer.zero_crossing_rate();
println!("Zero-crossing rate: {:.4}", zcr);
Source

pub fn spectral_centroid(&self) -> f32

Calculate spectral centroid

The spectral centroid is the “center of mass” of the spectrum, indicating where the majority of the signal’s energy is concentrated. Higher values indicate brighter sounds.

§Returns

The spectral centroid in Hz, or 0.0 if calculation fails.

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 1024], 22050);
let centroid = buffer.spectral_centroid();
println!("Spectral centroid: {:.2} Hz", centroid);
Source

pub fn spectral_rolloff(&self, threshold: f32) -> f32

Calculate spectral rolloff

The spectral rolloff is the frequency below which a specified percentage (typically 85%) of the total spectral energy is contained.

§Arguments
  • threshold - The energy threshold (0.0 to 1.0), typically 0.85
§Returns

The rolloff frequency in Hz, or 0.0 if calculation fails.

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 1024], 22050);
let rolloff = buffer.spectral_rolloff(0.85);
println!("Spectral rolloff: {:.2} Hz", rolloff);
Source

pub fn signal_to_noise_ratio(&self) -> f32

Calculate signal-to-noise ratio (SNR) in dB

Estimates SNR by comparing signal power to noise floor power. Uses a simple heuristic: assumes the quietest 10% of frames represent noise.

§Returns

SNR in dB, or 0.0 if calculation fails.

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 1000], 22050);
let snr = buffer.signal_to_noise_ratio();
println!("SNR: {:.2} dB", snr);
Source

pub fn crest_factor(&self) -> f32

Calculate crest factor (peak-to-RMS ratio) in dB

Crest factor indicates the dynamic range of the audio. Higher values indicate more dynamic content with prominent peaks.

§Returns

Crest factor in dB.

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 1000], 22050);
let crest = buffer.crest_factor();
println!("Crest factor: {:.2} dB", crest);
Source

pub fn detect_silence( &self, threshold_db: f32, min_duration: f32, ) -> Vec<(f32, f32)>

Detect silence segments in the audio

Returns a list of (start_time, end_time) tuples in seconds representing detected silence segments.

§Arguments
  • threshold_db - Silence threshold in dB (e.g., -40.0)
  • min_duration - Minimum silence duration in seconds
§Returns

Vector of (start, end) time pairs in seconds.

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 1000], 22050);
let silences = buffer.detect_silence(-40.0, 0.1);
for (start, end) in silences {
    println!("Silence: {:.2}s to {:.2}s", start, end);
}
Source

pub fn mfcc( &self, num_coeffs: usize, num_filters: usize, fft_size: usize, ) -> Vec<f32>

Calculate Mel-Frequency Cepstral Coefficients (MFCCs)

MFCCs are widely used in speech and audio processing for feature extraction. They represent the short-term power spectrum of a sound on a mel scale.

§Arguments
  • num_coeffs - Number of MFCC coefficients to extract (typically 13)
  • num_filters - Number of mel-scale filters (typically 26-40)
  • fft_size - FFT size (power of 2, typically 512-2048)
§Returns

Vector of MFCC coefficients, or empty vector if calculation fails.

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 1024], 22050);
let mfccs = buffer.mfcc(13, 26, 512);
println!("MFCC coefficients: {:?}", mfccs);
Source

pub fn detect_pitch_autocorr(&self, min_freq: f32, max_freq: f32) -> f32

Detect pitch using autocorrelation

Uses the autocorrelation method to estimate the fundamental frequency (F0) of the audio signal. This is more robust than simple zero-crossing rate.

§Arguments
  • min_freq - Minimum expected frequency in Hz (e.g., 80 for male voice)
  • max_freq - Maximum expected frequency in Hz (e.g., 400 for female voice)
§Returns

Estimated pitch in Hz, or 0.0 if no pitch detected.

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 1024], 22050);
let pitch = buffer.detect_pitch_autocorr(80.0, 400.0);
println!("Detected pitch: {:.2} Hz", pitch);
Source

pub fn spectral_flux( &self, prev_buffer: Option<&AudioBuffer>, fft_size: usize, ) -> f32

Calculate spectral flux

Spectral flux measures the rate of change in the power spectrum, useful for detecting onsets and transients in audio.

§Arguments
  • prev_buffer - Previous audio buffer for comparison (optional)
  • fft_size - FFT size for spectral analysis
§Returns

Spectral flux value, or 0.0 if calculation fails.

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer1 = AudioBuffer::mono(vec![0.5; 1024], 22050);
let buffer2 = AudioBuffer::mono(vec![0.6; 1024], 22050);
let flux = buffer2.spectral_flux(Some(&buffer1), 512);
println!("Spectral flux: {:.4}", flux);
Source

pub fn estimate_formants(&self, num_formants: usize) -> Vec<f32>

Estimate formant frequencies

Formants are resonant frequencies of the vocal tract, crucial for vowel identification and speaker characteristics. This uses LPC (Linear Predictive Coding) analysis to estimate the first 4 formants.

§Arguments
  • num_formants - Number of formants to estimate (typically 3-4)
§Returns

Vector of estimated formant frequencies in Hz.

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 1024], 22050);
let formants = buffer.estimate_formants(4);
println!("Formants: {:?} Hz", formants);
Source

pub fn calculate_jitter(&self, min_freq: f32, max_freq: f32) -> f32

Calculate Jitter (pitch period irregularity)

Jitter measures the cycle-to-cycle variation in fundamental frequency (F0), expressed as a percentage. It’s a crucial indicator of voice quality and pathology. Higher jitter indicates more irregular vocal fold vibration.

This implements the Jitter (local) metric, which is the average absolute difference between consecutive periods, divided by the average period.

§Arguments
  • min_freq - Minimum expected pitch (e.g., 75 Hz for male voice)
  • max_freq - Maximum expected pitch (e.g., 500 Hz for female voice)
§Returns

Jitter percentage (0-100), or 0.0 if calculation fails. Typical values:

  • < 1.0%: Normal voice quality
  • 1.0-2.0%: Mild irregularity
  • 2.0%: Potential voice pathology

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 4096], 22050);
let jitter = buffer.calculate_jitter(75.0, 500.0);
println!("Jitter: {:.2}%", jitter);
Source

pub fn calculate_shimmer(&self, min_freq: f32, max_freq: f32) -> f32

Calculate Shimmer (amplitude variation)

Shimmer measures the cycle-to-cycle variation in amplitude, expressed as a percentage. It’s an important indicator of voice quality and vocal fold irregularities. Higher shimmer indicates unstable voice production.

This implements the Shimmer (local) metric, which is the average absolute difference between consecutive peak amplitudes, divided by the average amplitude.

§Arguments
  • min_freq - Minimum expected pitch for period detection
  • max_freq - Maximum expected pitch for period detection
§Returns

Shimmer percentage (0-100), or 0.0 if calculation fails. Typical values:

  • < 3.0%: Normal voice quality
  • 3.0-6.0%: Mild amplitude variation
  • 6.0%: Potential voice pathology

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 4096], 22050);
let shimmer = buffer.calculate_shimmer(75.0, 500.0);
println!("Shimmer: {:.2}%", shimmer);
Source

pub fn calculate_hnr(&self, min_freq: f32, max_freq: f32) -> f32

Calculate Harmonic-to-Noise Ratio (HNR)

HNR measures the ratio of harmonic (periodic) to noise (aperiodic) energy in the voice signal. It’s a fundamental measure of voice quality. Higher HNR indicates clearer, more periodic voice production.

This uses autocorrelation-based method to separate harmonic and noise components.

§Arguments
  • min_freq - Minimum expected pitch
  • max_freq - Maximum expected pitch
§Returns

HNR in decibels (dB). Typical values:

  • 20 dB: Excellent voice quality

  • 10-20 dB: Good voice quality
  • 5-10 dB: Fair voice quality
  • < 5 dB: Poor voice quality or pathology
§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 4096], 22050);
let hnr = buffer.calculate_hnr(75.0, 500.0);
println!("HNR: {:.2} dB", hnr);
Source

pub fn calculate_delta_mfcc( mfcc_frames: &[Vec<f32>], delta_window: usize, ) -> Vec<Vec<f32>>

Calculate Delta MFCCs (first-order temporal derivatives)

Delta coefficients represent the rate of change of MFCCs over time, capturing dynamic spectral information. These are essential for improving speech recognition accuracy.

§Arguments
  • mfcc_frames - Vector of MFCC coefficient vectors from consecutive frames
  • delta_window - Number of frames to use for delta calculation (typically 2)
§Returns

Vector of delta MFCC vectors, one per input frame.

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 8192], 22050);
// Extract MFCCs from multiple frames...
let mfcc_frames = vec![
    buffer.mfcc(13, 26, 512),
    buffer.mfcc(13, 26, 512),
    buffer.mfcc(13, 26, 512),
];
let delta_mfccs = AudioBuffer::calculate_delta_mfcc(&mfcc_frames, 2);
Source

pub fn calculate_delta_delta_mfcc( delta_mfccs: &[Vec<f32>], delta_window: usize, ) -> Vec<Vec<f32>>

Calculate Delta-Delta MFCCs (second-order temporal derivatives)

Delta-Delta (acceleration) coefficients represent the rate of change of Delta coefficients, capturing the dynamics of spectral dynamics. Combined with MFCCs and Deltas, they form a powerful feature set for ASR.

§Arguments
  • delta_mfccs - Vector of delta MFCC coefficient vectors
  • delta_window - Number of frames to use (typically 2)
§Returns

Vector of delta-delta MFCC vectors.

§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 8192], 22050);
let mfcc_frames = vec![
    buffer.mfcc(13, 26, 512),
    buffer.mfcc(13, 26, 512),
    buffer.mfcc(13, 26, 512),
];
let delta_mfccs = AudioBuffer::calculate_delta_mfcc(&mfcc_frames, 2);
let delta_delta_mfccs = AudioBuffer::calculate_delta_delta_mfcc(&delta_mfccs, 2);
Source

pub fn chroma_features(&self, fft_size: usize, ref_freq: f32) -> Vec<f32>

Calculate Chroma features (pitch class representation)

Chroma features, also known as pitch class profiles or chromagrams, represent the intensity of the 12 pitch classes (C, C#, D, …, B) regardless of octave. This is particularly useful for music information retrieval, chord recognition, and key detection.

The algorithm maps each frequency bin to one of 12 pitch classes using: pitch_class = 12 * log2(freq / ref_freq) mod 12

§Arguments
  • fft_size - FFT size (must be power of 2, typically 2048 or 4096 for music)
  • ref_freq - Reference frequency for A4 (default: 440.0 Hz)
§Returns

12-element vector representing energy in each pitch class (C=0, C#=1, …, B=11). Returns empty vector if insufficient samples.

§Applications
  • Music information retrieval
  • Chord recognition and key detection
  • Cover song identification
  • Music similarity analysis
  • Tonal music analysis
§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 8192], 22050);
let chroma = buffer.chroma_features(2048, 440.0);
assert_eq!(chroma.len(), 12);
Source

pub fn spectral_contrast(&self, fft_size: usize, num_bands: usize) -> Vec<f32>

Calculate Spectral Contrast

Spectral contrast measures the difference between peaks and valleys in the spectrum across multiple frequency bands. This provides a robust timbre representation, particularly useful for music genre classification and audio texture analysis.

The spectrum is divided into sub-bands, and for each band:

  • Peak: Mean of top 20% of magnitudes
  • Valley: Mean of bottom 20% of magnitudes
  • Contrast: Peak - Valley (in dB)
§Arguments
  • fft_size - FFT size (must be power of 2, typically 2048)
  • num_bands - Number of frequency bands (typically 6-8)
§Returns

Vector of contrast values (in dB) for each frequency band. Returns empty vector if insufficient samples.

§Applications
  • Music genre classification
  • Audio texture analysis
  • Instrument recognition
  • Timbre characterization
  • Sound quality assessment
§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 8192], 22050);
let contrast = buffer.spectral_contrast(2048, 6);
assert_eq!(contrast.len(), 6);
Source

pub fn detect_pitch_yin( &self, min_freq: f32, max_freq: f32, threshold: f32, ) -> f32

Detect pitch using the YIN algorithm

The YIN algorithm is a robust pitch detection method that improves upon autocorrelation by using a cumulative mean normalized difference function. It provides more accurate pitch detection, especially for noisy signals.

YIN steps:

  1. Calculate difference function
  2. Apply cumulative mean normalization
  3. Find first minimum below threshold
  4. Apply parabolic interpolation for sub-sample accuracy

Reference: “YIN, a fundamental frequency estimator for speech and music” by Alain de Cheveigné and Hideki Kawahara (2002)

§Arguments
  • min_freq - Minimum frequency to search (Hz)
  • max_freq - Maximum frequency to search (Hz)
  • threshold - Threshold for minimum detection (typically 0.1-0.2)
§Returns

Detected fundamental frequency in Hz, or 0.0 if no pitch detected.

§Applications
  • High-accuracy pitch tracking
  • Music transcription
  • Singing voice analysis
  • Instrument tuning
  • Prosody analysis in speech
§Examples
use voirs_sdk::audio::AudioBuffer;

let buffer = AudioBuffer::mono(vec![0.5; 8192], 22050);
let pitch = buffer.detect_pitch_yin(80.0, 400.0, 0.15);
if pitch > 0.0 {
    println!("Detected pitch: {:.1} Hz", pitch);
}

Trait Implementations§

Source§

impl Clone for AudioBuffer

Source§

fn clone(&self) -> AudioBuffer

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for AudioBuffer

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more
Source§

impl<'de> Deserialize<'de> for AudioBuffer

Source§

fn deserialize<__D>( __deserializer: __D, ) -> Result<AudioBuffer, <__D as Deserializer<'de>>::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more
Source§

impl Serialize for AudioBuffer

Source§

fn serialize<__S>( &self, __serializer: __S, ) -> Result<<__S as Serializer>::Ok, <__S as Serializer>::Error>
where __S: Serializer,

Serialize this value into the given Serde serializer. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<S> FromSample<S> for S

Source§

fn from_sample_(s: S) -> S

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<F, T> IntoSample<T> for F
where T: FromSample<F>,

Source§

fn into_sample(self) -> T

Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> PolicyExt for T
where T: ?Sized,

Source§

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow only if self and other return Action::Follow. Read more
Source§

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow if either self or other returns Action::Follow. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<SS, SP> SupersetOf<SS> for SP
where SS: SubsetOf<SP>,

Source§

fn to_subset(&self) -> Option<SS>

The inverse inclusion map: attempts to construct self from the equivalent element of its superset. Read more
Source§

fn is_in_subset(&self) -> bool

Checks if self is actually part of its subset T (and can be converted to it).
Source§

fn to_subset_unchecked(&self) -> SS

Use with care! Same as self.to_subset but without any property checks. Always succeeds.
Source§

fn from_subset(element: &SS) -> SP

The inclusion map: converts self to the equivalent element of its superset.
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> ToSample<U> for T
where U: FromSample<T>,

Source§

fn to_sample_(self) -> U

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,

Source§

impl<S, T> Duplex<S> for T
where T: FromSample<S> + ToSample<S>,

Source§

impl<T> ErasedDestructor for T
where T: 'static,