pub mod audio;
pub mod config;
pub mod glue;
pub mod model;
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
pub use config::BundleConfig;
pub use model::{InferOpts, TinyModel};
pub use rlx_runtime::Device;
pub use rlx_inflect_nano::frontend;
pub struct TinyTts {
cfg: BundleConfig,
dir: PathBuf,
model: model::TinyModel,
frontend: std::sync::OnceLock<frontend::English>,
}
pub struct Wav {
pub samples: Vec<f32>,
pub sample_rate: u32,
}
impl TinyTts {
pub fn load_from_dir(dir: &Path) -> Result<Self> {
let cfg_s = std::fs::read_to_string(dir.join("config.json"))
.with_context(|| format!("read {}/config.json", dir.display()))?;
let cfg = BundleConfig::from_json(&cfg_s)?;
let model = model::TinyModel::new(dir.join("onnx"), cfg.clone());
Ok(Self {
cfg,
dir: dir.to_path_buf(),
model,
frontend: std::sync::OnceLock::new(),
})
}
pub fn config(&self) -> &BundleConfig {
&self.cfg
}
pub fn frontend(&self) -> Result<&frontend::English> {
if let Some(f) = self.frontend.get() {
return Ok(f);
}
let f = frontend::English::load(&self.dir.join("frontend"))?;
Ok(self.frontend.get_or_init(|| f))
}
pub fn text_to_ids(&self, text: &str) -> Result<(Vec<i64>, Vec<i64>, Vec<i64>)> {
self.frontend()?.text_to_ids(text, self.cfg.add_blank)
}
pub fn synthesize_on(&self, text: &str, device: Device, opts: &InferOpts) -> Result<Wav> {
let (phone, tone, lang) = self.text_to_ids(text)?;
let speaker = self.cfg.default_speaker();
let samples = self
.model
.synthesize(device, &phone, &tone, &lang, speaker, opts)?;
Ok(Wav {
samples,
sample_rate: self.cfg.sample_rate,
})
}
pub fn synthesize(&self, text: &str, opts: &InferOpts) -> Result<Wav> {
self.synthesize_on(text, Device::Cpu, opts)
}
pub fn preferred_device() -> Device {
[Device::Metal, Device::Mlx, Device::Gpu]
.into_iter()
.find(|&d| rlx_runtime::is_available(d))
.unwrap_or(Device::Cpu)
}
}