use std::path::PathBuf;
fn main() -> anyhow::Result<()> {
let mut args = std::env::args().skip(1).peekable();
let mut encoder_bpk: Option<PathBuf> = None; let mut audio_path: Option<PathBuf> = None;
let mut out_path: Option<PathBuf> = None;
while let Some(arg) = args.next() {
match arg.as_str() {
"--encoder" => { if let Some(v) = args.next() { encoder_bpk = Some(PathBuf::from(v)); } }
"--audio" | "-i" => { if let Some(v) = args.next() { audio_path = Some(PathBuf::from(v)); } }
"--out" | "-o" => { if let Some(v) = args.next() { out_path = Some(PathBuf::from(v)); } }
"--help" | "-h" => { print_help(); return Ok(()); }
other => {
eprintln!("Unknown argument: {other} (use --help for usage)");
std::process::exit(1);
}
}
}
let audio_path = audio_path.ok_or_else(|| {
anyhow::anyhow!("No audio file specified. Use --audio <path.wav> (--help for more)")
})?;
if !audio_path.exists() {
anyhow::bail!("Audio file not found: {}", audio_path.display());
}
let out_path = out_path.unwrap_or_else(|| audio_path.with_extension("npy"));
match &encoder_bpk {
Some(p) => println!("Encoder : external BurnPack {}", p.display()),
None => println!("Encoder : embedded weights (wgpu → ndarray fallback)"),
}
println!("Audio : {}", audio_path.display());
println!("Output : {}", out_path.display());
println!();
println!("Initialising encoder…");
let encoder = match encoder_bpk {
Some(ref p) => neutts::NeuCodecEncoder::load(p)
.map_err(|e| anyhow::anyhow!("Failed to load encoder from {}: {e}", p.display()))?,
None => neutts::NeuCodecEncoder::new()
.map_err(|e| anyhow::anyhow!(
"{e}\n\n\
Run the one-time setup to embed the encoder:\n\
\n\
\tcargo run --example download_models\n\
\tcargo build\n"
))?,
};
println!(" → backend : {}", encoder.backend_name());
println!();
println!("Encoding {}…", audio_path.display());
let codes = encoder.encode_wav(&audio_path)?;
let duration_s = codes.len() as f32 / 50.0;
println!(
" → {} tokens ({:.2} s of audio at 50 tokens/s)",
codes.len(),
duration_s,
);
if duration_s < 3.0 {
eprintln!(
"WARNING: reference is only {duration_s:.1} s — \
5–30 s of clean speech gives the best cloning quality."
);
}
if let Some(parent) = out_path.parent() {
if !parent.as_os_str().is_empty() {
std::fs::create_dir_all(parent).ok();
}
}
neutts::npy::write_npy_i32(&out_path, &codes)?;
println!("Saved → {}", out_path.display());
println!();
println!(
"Use these codes for synthesis:\n\
\n\
\tcargo run --example basic --features espeak -- \\\n\
\t --text \"Your text here.\" \\\n\
\t --ref-codes {} \\\n\
\t --ref-text \"Transcript of the reference recording.\"",
out_path.display()
);
Ok(())
}
fn print_help() {
println!(
"encode_reference — encode a WAV to NeuCodec token IDs (.npy)\n\
\n\
The NeuCodec encoder is compiled into the binary — no external ONNX Runtime.\n\
Wgpu (GPU) is tried first; falls back to NdArray (CPU) automatically.\n\
\n\
SETUP (one-time):\n\
\tcargo run --example download_models && cargo build\n\
\n\
USAGE:\n\
\tcargo run --example encode_reference -- [OPTIONS]\n\
\n\
OPTIONS:\n\
\t--audio / -i PATH Input WAV file (required)\n\
\t--out / -o PATH Output .npy (default: same stem as audio)\n\
\t--encoder PATH External BurnPack (.bpk) weight file\n\
\t (default: use weights embedded in binary)\n\
\t--help / -h Show this help\n\
\n\
FORCE CPU (no wgpu):\n\
\tcargo run --example encode_reference --no-default-features -- --audio <WAV>"
);
}