encode_reference/
encode_reference.rs1use std::path::PathBuf;
61
62fn main() -> anyhow::Result<()> {
63 let mut args = std::env::args().skip(1).peekable();
65
66 let mut encoder_bpk: Option<PathBuf> = None; let mut audio_path: Option<PathBuf> = None;
68 let mut out_path: Option<PathBuf> = None;
69
70 while let Some(arg) = args.next() {
71 match arg.as_str() {
72 "--encoder" => { if let Some(v) = args.next() { encoder_bpk = Some(PathBuf::from(v)); } }
73 "--audio" | "-i" => { if let Some(v) = args.next() { audio_path = Some(PathBuf::from(v)); } }
74 "--out" | "-o" => { if let Some(v) = args.next() { out_path = Some(PathBuf::from(v)); } }
75 "--help" | "-h" => { print_help(); return Ok(()); }
76 other => {
77 eprintln!("Unknown argument: {other} (use --help for usage)");
78 std::process::exit(1);
79 }
80 }
81 }
82
83 let audio_path = audio_path.ok_or_else(|| {
85 anyhow::anyhow!("No audio file specified. Use --audio <path.wav> (--help for more)")
86 })?;
87
88 if !audio_path.exists() {
89 anyhow::bail!("Audio file not found: {}", audio_path.display());
90 }
91
92 let out_path = out_path.unwrap_or_else(|| audio_path.with_extension("npy"));
93
94 match &encoder_bpk {
96 Some(p) => println!("Encoder : external BurnPack {}", p.display()),
97 None => println!("Encoder : embedded weights (wgpu → ndarray fallback)"),
98 }
99 println!("Audio : {}", audio_path.display());
100 println!("Output : {}", out_path.display());
101 println!();
102
103 println!("Initialising encoder…");
105 let encoder = match encoder_bpk {
106 Some(ref p) => neutts::NeuCodecEncoder::load(p)
107 .map_err(|e| anyhow::anyhow!("Failed to load encoder from {}: {e}", p.display()))?,
108 None => neutts::NeuCodecEncoder::new()
109 .map_err(|e| anyhow::anyhow!(
110 "{e}\n\n\
111 Run the one-time setup to embed the encoder:\n\
112 \n\
113 \tcargo run --example download_models\n\
114 \tcargo build\n"
115 ))?,
116 };
117 println!(" → backend : {}", encoder.backend_name());
118 println!();
119
120 println!("Encoding {}…", audio_path.display());
122 let codes = encoder.encode_wav(&audio_path)?;
123
124 let duration_s = codes.len() as f32 / 50.0;
125 println!(
126 " → {} tokens ({:.2} s of audio at 50 tokens/s)",
127 codes.len(),
128 duration_s,
129 );
130
131 if duration_s < 3.0 {
132 eprintln!(
133 "WARNING: reference is only {duration_s:.1} s — \
134 5–30 s of clean speech gives the best cloning quality."
135 );
136 }
137
138 if let Some(parent) = out_path.parent() {
140 if !parent.as_os_str().is_empty() {
141 std::fs::create_dir_all(parent).ok();
142 }
143 }
144
145 neutts::npy::write_npy_i32(&out_path, &codes)?;
146 println!("Saved → {}", out_path.display());
147
148 println!();
149 println!(
150 "Use these codes for synthesis:\n\
151 \n\
152 \tcargo run --example basic --features espeak -- \\\n\
153 \t --text \"Your text here.\" \\\n\
154 \t --ref-codes {} \\\n\
155 \t --ref-text \"Transcript of the reference recording.\"",
156 out_path.display()
157 );
158
159 Ok(())
160}
161
162fn print_help() {
167 println!(
168 "encode_reference — encode a WAV to NeuCodec token IDs (.npy)\n\
169 \n\
170 The NeuCodec encoder is compiled into the binary — no external ONNX Runtime.\n\
171 Wgpu (GPU) is tried first; falls back to NdArray (CPU) automatically.\n\
172 \n\
173 SETUP (one-time):\n\
174 \tcargo run --example download_models && cargo build\n\
175 \n\
176 USAGE:\n\
177 \tcargo run --example encode_reference -- [OPTIONS]\n\
178 \n\
179 OPTIONS:\n\
180 \t--audio / -i PATH Input WAV file (required)\n\
181 \t--out / -o PATH Output .npy (default: same stem as audio)\n\
182 \t--encoder PATH External BurnPack (.bpk) weight file\n\
183 \t (default: use weights embedded in binary)\n\
184 \t--help / -h Show this help\n\
185 \n\
186 FORCE CPU (no wgpu):\n\
187 \tcargo run --example encode_reference --no-default-features -- --audio <WAV>"
188 );
189}