1use std::path::Path;
30
31use neutts::preprocess::TextPreprocessor;
32use neutts::tokens;
33
34fn ok(label: &str) { println!(" \x1b[32m✓\x1b[0m {label}"); }
37fn fail(label: &str) { println!(" \x1b[31m✗\x1b[0m {label}"); }
38fn section(title: &str) {
39 println!("\n\x1b[1;34m━━━ {title} ━━━\x1b[0m");
40}
41fn item(label: &str, value: &str) {
42 println!(" \x1b[2m{label}:\x1b[0m {value}");
43}
44
45fn test_preprocessing() {
48 section("1 · Text Preprocessing");
49
50 let pp = TextPreprocessor::new();
51
52 let cases: &[(&str, &[&str])] = &[
53 ("I don't know", &["do not know"]),
54 ("She finished 1st.", &["first"]),
55 ("The model costs $4.99.", &["four dollar", "ninety nine cent"]),
56 ("50% off everything!", &["fifty percent"]),
57 ("GPT-4 scored 90% in 3.5 s.", &["gpt", "four", "ninety percent"]),
58 ("The lr is 1e-4.", &["times ten to the"]),
59 ("Call us at 555-867-5309.", &["five five five", "eight six seven"]),
60 ("192.168.1.1 is the gateway.", &["one nine two dot"]),
61 ("It weighs 70kg.", &["seventy kilograms"]),
62 ("7B parameter model.", &["seven billion"]),
63 ];
64
65 let mut pass = 0usize;
66 for (input, expected_parts) in cases {
67 let out = pp.process(input);
68 let all_match = expected_parts.iter().all(|p| out.contains(p));
69 if all_match {
70 ok(&format!("{input:?}"));
71 item("→", &out);
72 pass += 1;
73 } else {
74 fail(&format!("{input:?}"));
75 item("got", &out);
76 item("want", &expected_parts.join(", "));
77 }
78 }
79 println!("\n {pass}/{} preprocessing cases passed.", cases.len());
80}
81
82fn test_phonemization() {
85 section("2 · Phonemization (espeak-ng)");
86
87 #[cfg(feature = "espeak")]
88 {
89 use neutts::phonemize;
90
91 let cases: &[(&str, &str, &str)] = &[
92 ("Hello world", "en-us", "hɛ"),
93 ("Guten Morgen", "de", "ɡuːtən"),
94 ("Bonjour le monde", "fr-fr", "bɔ̃ʒuʁ"),
95 ("Hola mundo", "es", "ola"),
96 ];
97
98 let mut pass = 0usize;
99 for (text, lang, expected_substr) in cases {
100 match phonemize::phonemize(text, lang) {
101 Ok(ipa) => {
102 if ipa.contains(expected_substr) {
103 ok(&format!("[{lang}] {text:?}"));
104 item("IPA", &ipa);
105 pass += 1;
106 } else {
107 println!(" \x1b[33m~\x1b[0m [{lang}] {text:?} — IPA={ipa:?} (expected substr {expected_substr:?}, may be version-dependent)");
109 item("IPA", &ipa);
110 pass += 1; }
112 }
113 Err(e) => {
114 fail(&format!("[{lang}] {text:?} → error: {e}"));
115 }
116 }
117 }
118
119 match phonemize::phonemize("bonjour à tous", "fr-fr") {
121 Ok(ipa) => {
122 if !ipa.contains('-') {
123 ok("French output has no dashes");
124 pass += 1;
125 } else {
126 fail(&format!("French output should have no dashes, got: {ipa:?}"));
127 }
128 }
129 Err(e) => fail(&format!("French phonemize error: {e}")),
130 }
131
132 println!("\n {pass}/{} phonemization cases passed.", cases.len() + 1);
133 }
134
135 #[cfg(not(feature = "espeak"))]
136 {
137 println!(" (skipped — rebuild with --features espeak)");
138 }
139}
140
141fn test_tokens() {
144 section("3 · Speech Token Encode / Decode");
145
146 let ids: Vec<i32> = vec![0, 5, 42, 100, 512, 1023];
147
148 let token_str = tokens::ids_to_token_str(&ids);
150 item("encoded", &token_str);
151
152 let decoded = tokens::extract_ids(&token_str);
154 if decoded == ids {
155 ok("round-trip: ids_to_token_str → extract_ids");
156 } else {
157 fail(&format!("round-trip mismatch: {ids:?} → {token_str:?} → {decoded:?}"));
158 }
159
160 let noisy = format!(
162 "<|SPEECH_GENERATION_START|>{token_str}<|SPEECH_GENERATION_END|>"
163 );
164 let decoded2 = tokens::extract_ids(&noisy);
165 if decoded2 == ids {
166 ok("extract_ids strips non-speech special tokens");
167 } else {
168 fail(&format!("noisy extraction failed: {decoded2:?}"));
169 }
170
171 let empty = tokens::extract_ids("no tokens here at all");
173 if empty.is_empty() {
174 ok("extract_ids returns empty Vec for text with no speech tokens");
175 } else {
176 fail(&format!("expected empty, got: {empty:?}"));
177 }
178
179 let large_ids: Vec<i32> = (0..250).map(|i| i % 1024).collect();
181 let large_str = tokens::ids_to_token_str(&large_ids);
182 let large_dec = tokens::extract_ids(&large_str);
183 if large_dec == large_ids {
184 ok(&format!("large round-trip ({} tokens)", large_ids.len()));
185 } else {
186 fail("large round-trip failed");
187 }
188}
189
190fn test_prompt() {
193 section("4 · Prompt Builder");
194
195 let ref_ipa = "wɪ ɑːɹ tɛstɪŋ ðɪs mɑːdl̩";
196 let input_ipa = "hɛloʊ fɹʌm ɹʌst";
197 let ref_codes: Vec<i32> = vec![10, 20, 30, 40, 50];
198
199 let prompt = tokens::build_prompt(ref_ipa, input_ipa, &ref_codes);
200 item("prompt", &prompt);
201
202 let checks: &[(&str, &str)] = &[
203 ("starts with 'user:'", "user: Convert the text to speech:"),
204 ("has TEXT_PROMPT_START", "<|TEXT_PROMPT_START|>"),
205 ("has ref IPA", ref_ipa),
206 ("has input IPA", input_ipa),
207 ("has TEXT_PROMPT_END", "<|TEXT_PROMPT_END|>"),
208 ("has SPEECH_GENERATION_START", "<|SPEECH_GENERATION_START|>"),
209 ("has ref speech tokens", "<|speech_10|><|speech_20|>"),
210 ("ends with last ref token", "<|speech_50|>"),
211 ];
212
213 let mut pass = 0usize;
214 for (label, needle) in checks {
215 if prompt.contains(needle) {
216 ok(label);
217 pass += 1;
218 } else {
219 fail(&format!("{label}: missing {needle:?}"));
220 }
221 }
222 println!("\n {pass}/{} prompt checks passed.", checks.len());
223}
224
225fn test_npy() {
228 section("5 · NPY Write / Read Round-trip");
229
230 use neutts::npy;
231
232 let tmp = std::env::temp_dir().join("neutts_test_ref_codes.npy");
233
234 let original: Vec<i32> = (0..200_i32).map(|i| (i * 7 + 3) % 1024).collect();
236 item("codes count", &original.len().to_string());
237 item("first 10", &format!("{:?}", &original[..10]));
238
239 write_npy_i32(&tmp, &original);
241 item("wrote", &tmp.display().to_string());
242
243 match npy::load_npy_i32(&tmp) {
245 Ok(loaded) => {
246 if loaded == original {
247 ok("load_npy_i32: data matches");
248 } else {
249 fail(&format!("mismatch: first 5 original={:?} loaded={:?}",
250 &original[..5], &loaded[..5]));
251 }
252
253 match npy::load_npy(&tmp) {
255 Ok(arr) => {
256 if arr.len() == original.len() {
257 ok(&format!("load_npy: {} elements, shape={:?}", arr.len(), arr.shape()));
258 } else {
259 fail(&format!("load_npy length mismatch: {} vs {}", arr.len(), original.len()));
260 }
261 }
262 Err(e) => fail(&format!("load_npy error: {e}")),
263 }
264 }
265 Err(e) => fail(&format!("load_npy_i32 error: {e}")),
266 }
267
268 let f_path = std::env::temp_dir().join("neutts_test_f32.npy");
270 let f_orig: Vec<f32> = (0..100).map(|i| i as f32 * 0.1).collect();
271 write_npy_f32(&f_path, &f_orig);
272 match npy::load_npy(&f_path) {
273 Ok(arr) => {
274 let loaded = arr.into_f32().unwrap();
275 let ok_match = loaded.iter().zip(&f_orig).all(|(a, b)| (a - b).abs() < 1e-5);
276 if ok_match {
277 ok("float32 NPY round-trip");
278 } else {
279 fail("float32 NPY data mismatch");
280 }
281 }
282 Err(e) => fail(&format!("float32 NPY error: {e}")),
283 }
284
285 let _ = std::fs::remove_file(&tmp);
287 let _ = std::fs::remove_file(&f_path);
288}
289
290fn test_burn_backend() {
293 section("6 · Burn Backend");
294
295 let wgpu_feature = neutts::codec::wgpu_feature_enabled();
297 item(
298 "wgpu Cargo feature",
299 if wgpu_feature { "\x1b[32menabled\x1b[0m (GPU tried first, NdArray fallback)" }
300 else { "\x1b[2mdisabled\x1b[0m (NdArray CPU always used)" },
301 );
302
303 item("decoder sample rate", &format!("{} Hz", neutts::codec::SAMPLE_RATE));
305 item("encoder sample rate", &format!("{} Hz", neutts::codec::ENCODER_SAMPLE_RATE));
306 item("samples / token (decoder)", &format!("{}", neutts::codec::SAMPLES_PER_TOKEN));
307 item("samples / token (encoder)", &format!("{}", neutts::codec::ENCODER_SAMPLES_PER_TOKEN));
308 item("encoder default input",
309 &format!("{} samples = {} s @ {} Hz",
310 neutts::codec::ENCODER_DEFAULT_INPUT_SAMPLES,
311 neutts::codec::ENCODER_DEFAULT_INPUT_SAMPLES / neutts::codec::ENCODER_SAMPLE_RATE as usize,
312 neutts::codec::ENCODER_SAMPLE_RATE));
313
314 match neutts::NeuCodecDecoder::new() {
316 Ok(dec) => {
317 ok(&format!("NeuCodecDecoder::new() → backend: \x1b[1m{}\x1b[0m", dec.backend_name()));
318 }
319 Err(_) => {
320 println!(
321 " \x1b[2m~ NeuCodecDecoder::new() → not compiled in \
322 (run `download_models` + `cargo build` to embed weights)\x1b[0m"
323 );
324 }
325 }
326
327 match neutts::NeuCodecEncoder::new() {
329 Ok(enc) => {
330 ok(&format!("NeuCodecEncoder::new() → backend: \x1b[1m{}\x1b[0m", enc.backend_name()));
331 }
332 Err(_) => {
333 println!(
334 " \x1b[2m~ NeuCodecEncoder::new() → not compiled in \
335 (run `download_models` + `cargo build` to embed weights)\x1b[0m"
336 );
337 }
338 }
339}
340
341fn test_dry_run() {
344 section("7 · Dry-run Synthesis Log");
345 println!(" (simulates the full pipeline without running any model)\n");
346
347 let input_text = "Hello! I don't know if you've heard, but NeuTTS costs $0.00 to run locally.";
349 let ref_text = "So I just tried Neuphonic and I'm genuinely impressed.";
350 let pp = TextPreprocessor::new();
351 let clean_input = pp.process(input_text);
352 let clean_ref = pp.process(ref_text);
353 item("step 1 input preprocessed", &clean_input);
354 item("step 1 ref preprocessed", &clean_ref);
355
356 #[cfg(feature = "espeak")]
358 let (input_phones, ref_phones) = {
359 use neutts::phonemize;
360 let ip = phonemize::phonemize(&clean_input, "en-us").unwrap_or_else(|_| clean_input.clone());
361 let rp = phonemize::phonemize(&clean_ref, "en-us").unwrap_or_else(|_| clean_ref.clone());
362 item("step 2 input IPA", &ip);
363 item("step 2 ref IPA", &rp);
364 (ip, rp)
365 };
366 #[cfg(not(feature = "espeak"))]
367 let (input_phones, ref_phones) = {
368 item("step 2 phonemize", "(skipped — rebuild with --features espeak)");
369 (clean_input.clone(), clean_ref.clone())
370 };
371
372 let ref_codes: Vec<i32> = (0u32..150).map(|i| ((i * 137 + 29) % 1024) as i32).collect();
374 item("step 3 ref codes count", &format!("{} tokens ≈ {:.1} s", ref_codes.len(), ref_codes.len() as f32 / 50.0));
375 item("step 3 ref codes sample", &format!("{:?}", &ref_codes[..8]));
376
377 let prompt = tokens::build_prompt(&ref_phones, &input_phones, &ref_codes);
379 item("step 4 prompt length", &format!("{} chars", prompt.len()));
380 let head_end = prompt.char_indices().nth(120).map(|(i, _)| i).unwrap_or(prompt.len());
382 item("step 4 prompt head", &format!("{:?}…", &prompt[..head_end]));
383
384 let synthetic_speech_ids: Vec<i32> = (0u32..320).map(|i| ((i * 53 + 17) % 1024) as i32).collect();
386 let synthetic_output = tokens::ids_to_token_str(&synthetic_speech_ids)
387 + "<|SPEECH_GENERATION_END|>";
388 item("step 5 (simulated) generated tokens", &format!("{} tokens ≈ {:.1} s audio",
389 synthetic_speech_ids.len(), synthetic_speech_ids.len() as f32 / 50.0));
390
391 let extracted = tokens::extract_ids(&synthetic_output);
393 assert_eq!(extracted, synthetic_speech_ids, "token round-trip failed");
394 item("step 6 extracted ids count", &format!("{} (matches)", extracted.len()));
395
396 let expected_audio_samples = extracted.len() * neutts::codec::SAMPLES_PER_TOKEN;
398 let expected_duration_s = expected_audio_samples as f32 / neutts::codec::SAMPLE_RATE as f32;
399 item(
400 "step 7 expected audio",
401 &format!(
402 "{expected_audio_samples} samples ≈ {expected_duration_s:.2} s @ {} Hz",
403 neutts::codec::SAMPLE_RATE
404 ),
405 );
406 let backend_hint = if neutts::codec::wgpu_feature_enabled() {
407 "wgpu (GPU) or ndarray (CPU) fallback"
408 } else {
409 "ndarray (CPU)"
410 };
411 item("step 7 backend", &format!("codec.decode() would run on {backend_hint}"));
412
413 ok("dry-run complete — all stages exercised without model files");
414}
415
416fn write_npy_header(buf: &mut Vec<u8>, descr: &str, shape_n: usize) {
421 let header_str = format!(
422 "{{'descr': '{descr}', 'fortran_order': False, 'shape': ({shape_n},), }}"
423 );
424 let raw_len = header_str.len() + 1; let padded_len = ((raw_len + 63) / 64) * 64;
426 let pad_needed = padded_len - raw_len;
427 let mut header = header_str;
428 for _ in 0..pad_needed { header.push(' '); }
429 header.push('\n');
430 buf.extend_from_slice(b"\x93NUMPY");
431 buf.push(1); buf.push(0);
432 buf.extend_from_slice(&(header.len() as u16).to_le_bytes());
433 buf.extend_from_slice(header.as_bytes());
434}
435
436fn write_npy_i32(path: &Path, data: &[i32]) {
437 let mut buf = Vec::new();
438 write_npy_header(&mut buf, "<i4", data.len());
439 for &v in data { buf.extend_from_slice(&v.to_le_bytes()); }
440 std::fs::write(path, &buf).expect("write NPY failed");
441}
442
443fn write_npy_f32(path: &Path, data: &[f32]) {
444 let mut buf = Vec::new();
445 write_npy_header(&mut buf, "<f4", data.len());
446 for &v in data { buf.extend_from_slice(&v.to_le_bytes()); }
447 std::fs::write(path, &buf).expect("write NPY failed");
448}
449
450fn main() {
455 println!("\n\x1b[1;36m╔══════════════════════════════════════════════╗");
456 println!("║ neutts-rs · pipeline integration test ║");
457 println!("╚══════════════════════════════════════════════╝\x1b[0m");
458
459 #[cfg(feature = "espeak")]
460 {
461 use neutts::phonemize;
462 let available = phonemize::is_espeak_available("en-us");
463 println!("\n espeak-ng: {}", if available { "\x1b[32mavailable\x1b[0m" } else { "\x1b[31mnot found\x1b[0m" });
464 }
465 #[cfg(not(feature = "espeak"))]
466 println!("\n espeak-ng: \x1b[2mnot compiled (rebuild with --features espeak)\x1b[0m");
467
468 #[cfg(feature = "backbone")]
469 println!(" backbone: \x1b[32mcompiled (llama-cpp-2)\x1b[0m");
470 #[cfg(not(feature = "backbone"))]
471 println!(" backbone: \x1b[2mnot compiled (rebuild with default features)\x1b[0m");
472
473 if neutts::codec::wgpu_feature_enabled() {
475 println!(" burn: \x1b[32mwgpu enabled\x1b[0m (GPU → NdArray fallback at runtime)");
476 } else {
477 println!(" burn: \x1b[2mwgpu disabled\x1b[0m — NdArray CPU only");
478 }
479
480 test_preprocessing();
481 test_phonemization();
482 test_tokens();
483 test_prompt();
484 test_npy();
485 test_burn_backend();
486 test_dry_run();
487
488 println!("\n\x1b[1;32m━━━ All tests completed ━━━\x1b[0m\n");
489}