use serde::Deserialize;
#[derive(Debug, Clone, Deserialize)]
pub struct SpeakerEncoderConfig {
#[serde(default = "default_mel_dim")]
pub mel_dim: usize,
#[serde(default = "default_enc_dim")]
pub enc_dim: usize,
#[serde(default = "default_enc_channels")]
pub enc_channels: Vec<usize>,
#[serde(default = "default_enc_kernel_sizes")]
pub enc_kernel_sizes: Vec<usize>,
#[serde(default = "default_enc_dilations")]
pub enc_dilations: Vec<usize>,
#[serde(default = "default_enc_attention_channels")]
pub enc_attention_channels: usize,
#[serde(default = "default_enc_res2net_scale")]
pub enc_res2net_scale: usize,
#[serde(default = "default_enc_se_channels")]
pub enc_se_channels: usize,
#[serde(default = "default_sample_rate")]
pub sample_rate: usize,
}
fn default_mel_dim() -> usize {
128
}
fn default_enc_dim() -> usize {
1024
}
fn default_enc_channels() -> Vec<usize> {
vec![512, 512, 512, 512, 1536]
}
fn default_enc_kernel_sizes() -> Vec<usize> {
vec![5, 3, 3, 3, 1]
}
fn default_enc_dilations() -> Vec<usize> {
vec![1, 2, 3, 4, 1]
}
fn default_enc_attention_channels() -> usize {
128
}
fn default_enc_res2net_scale() -> usize {
8
}
fn default_enc_se_channels() -> usize {
128
}
fn default_sample_rate() -> usize {
24000
}
impl Default for SpeakerEncoderConfig {
fn default() -> Self {
Self {
mel_dim: default_mel_dim(),
enc_dim: default_enc_dim(),
enc_channels: default_enc_channels(),
enc_kernel_sizes: default_enc_kernel_sizes(),
enc_dilations: default_enc_dilations(),
enc_attention_channels: default_enc_attention_channels(),
enc_res2net_scale: default_enc_res2net_scale(),
enc_se_channels: default_enc_se_channels(),
sample_rate: default_sample_rate(),
}
}
}