use std::path::{Path, PathBuf};
fn main() -> anyhow::Result<()> {
let mut args = std::env::args().skip(1).peekable();
let mut backbone = "neuphonic/neutts-nano-q4-gguf".to_string();
let mut text = "Hello from Rust! NeuTTS brings voice cloning to your local device.".to_string();
let mut ref_codes_path = PathBuf::from("samples/jo.npy");
let mut ref_text_path = PathBuf::from("samples/jo.txt");
let mut output = "output.wav".to_string();
while let Some(arg) = args.next() {
match arg.as_str() {
"--backbone" => { if let Some(v) = args.next() { backbone = v; } }
"--text" => { if let Some(v) = args.next() { text = v; } }
"--ref-codes" => { if let Some(v) = args.next() { ref_codes_path = PathBuf::from(v); } }
"--ref-audio" => {
if let Some(v) = args.next() {
ref_codes_path = Path::new(&v).with_extension("npy");
}
}
"--ref-text" => { if let Some(v) = args.next() { ref_text_path = PathBuf::from(v); } }
"--output" | "--out" => { if let Some(v) = args.next() { output = v; } }
"--help" | "-h" => { print_help(); return Ok(()); }
other => {
eprintln!("Unknown argument: {other} (use --help for usage)");
std::process::exit(1);
}
}
}
#[cfg(feature = "espeak")]
if !neutts::phonemize::is_espeak_available("en-us") {
eprintln!(
"WARNING: espeak-ng not found.\n\
Install: brew install espeak-ng (macOS)\n\
Or: apt install espeak-ng (Debian/Ubuntu)\n\
Or: apk add espeak-ng (Alpine)"
);
}
let ref_text = if ref_text_path.exists() {
std::fs::read_to_string(&ref_text_path)
.map(|s| s.trim().to_string())
.unwrap_or_default()
} else {
ref_text_path.to_string_lossy().into_owned()
};
if ref_text.is_empty() {
anyhow::bail!(
"Reference text is empty. Provide --ref-text <PATH|TEXT> or create {}",
ref_text_path.display()
);
}
println!("Backbone : {backbone}");
println!("Codec : Burn {}", burn_backend_label());
println!("Text : {text:?}");
println!("Ref codes : {}", ref_codes_path.display());
println!("Ref text : {ref_text:?}");
println!("Output : {output}");
println!();
println!("Loading models…");
let tts = neutts::download::load_from_hub_cb(&backbone, None, |p| {
use neutts::download::LoadProgress;
match &p {
LoadProgress::Fetching { step, total, file, repo } =>
println!(" [{step}/{total}] Fetching {file} from {repo}…"),
LoadProgress::Loading { step, total, component } =>
println!(" [{step}/{total}] Loading {component}…"),
}
})?;
println!(" → codec backend : {}", tts.codec.backend_name());
println!();
if !ref_codes_path.exists() {
let available: Vec<String> = std::fs::read_dir("samples")
.into_iter().flatten().flatten()
.filter_map(|e| {
let p = e.path();
if p.extension().and_then(|x| x.to_str()) == Some("npy") {
Some(format!(" samples/{}", p.file_name()?.to_string_lossy()))
} else { None }
})
.collect();
let hint = if available.is_empty() { String::new() } else {
format!("\n\nAvailable samples:\n{}", available.join("\n"))
};
anyhow::bail!(
"Reference codes file not found: {}{}\n\
\nTo generate your own, run:\n\
\n\
\tcargo run --example encode_reference -- \\\n\
\t --audio reference.wav --out samples/my_voice.npy\n\
\n\
Or use the Python helper:\n\
\n\
\tfrom neutts import NeuTTS; import numpy as np\n\
\ttts = NeuTTS(codec_repo='neuphonic/neucodec')\n\
\tnp.save('ref.npy', tts.encode_reference('ref.wav').numpy().astype('int32'))\n",
ref_codes_path.display(), hint,
);
}
println!("Loading reference codes from {}…", ref_codes_path.display());
let ref_codes = tts.load_ref_codes(&ref_codes_path)?;
println!(" → {} codec tokens (~{:.1} s of reference audio)",
ref_codes.len(), ref_codes.len() as f32 / 50.0);
println!("\nSynthesising…");
let audio = tts.infer(&text, &ref_codes, &ref_text)?;
println!(
" → {} samples ({:.2} s at {} Hz)",
audio.len(),
audio.len() as f32 / neutts::SAMPLE_RATE as f32,
neutts::SAMPLE_RATE,
);
tts.write_wav(&audio, Path::new(&output))?;
println!("\nDone → {output}");
Ok(())
}
fn burn_backend_label() -> &'static str {
if cfg!(feature = "wgpu") {
"wgpu → ndarray fallback"
} else {
"ndarray (CPU)"
}
}
fn print_help() {
println!(
"basic — download backbone + synthesise speech with voice cloning\n\
\n\
USAGE:\n\
\tcargo run --example basic --features espeak -- [OPTIONS]\n\
\n\
OPTIONS:\n\
\t--backbone REPO HuggingFace backbone repo\n\
\t (default: neuphonic/neutts-nano-q4-gguf)\n\
\t--text TEXT Text to synthesise\n\
\t--ref-codes PATH Pre-encoded reference codes (.npy)\n\
\t--ref-audio PATH Reference audio (.wav) — derives .npy from same stem\n\
\t--ref-text PATH Transcript of the reference recording (file or string)\n\
\t--output/--out PATH Output WAV (default: output.wav)\n\
\t--help / -h Show this help\n\
\n\
BACKEND:\n\
\twgpu is the default; falls back to ndarray automatically.\n\
\tForce CPU-only: cargo run --example basic --no-default-features --features espeak\n\
\n\
SETUP (one-time, before first run):\n\
\tcargo run --example download_models # fetch NeuCodec ONNX\n\
\tcargo build # embed weights into binary"
);
}