1use rlx_runtime::Device;
19
20#[derive(Debug, Clone, Copy)]
22pub struct VoxtralTtsBenchReport {
23 pub device: Device,
24 pub eager_lm: bool,
25 pub eager_acoustic: bool,
26 pub embed_ms: f64,
28 pub lm_prefill_ms: f64,
30 pub lm_decode_ms: f64,
32 pub acoustic_ms: f64,
34 pub codec_ms: f64,
36 pub synthesis_ms: f64,
38 pub audio_frames: usize,
39 pub prompt_tokens: usize,
40 pub pcm_samples: usize,
41 pub euler_steps_per_frame: usize,
43 pub acoustic_velocity_runs: u64,
45}
46
47impl VoxtralTtsBenchReport {
48 pub fn sample_rate_hz() -> u32 {
49 24_000
50 }
51
52 pub fn audio_duration_ms(&self) -> f64 {
53 if self.pcm_samples == 0 {
54 return 0.0;
55 }
56 self.pcm_samples as f64 / Self::sample_rate_hz() as f64 * 1000.0
57 }
58
59 pub fn rtf(&self) -> f64 {
60 let dur = self.audio_duration_ms();
61 if dur <= 0.0 {
62 return 0.0;
63 }
64 self.synthesis_ms / dur
65 }
66
67 pub fn lm_total_ms(&self) -> f64 {
68 self.lm_prefill_ms + self.lm_decode_ms
69 }
70
71 pub fn stage_share(&self, ms: f64) -> f64 {
72 let total = self.synthesis_ms;
73 if total <= 0.0 {
74 0.0
75 } else {
76 100.0 * ms / total
77 }
78 }
79
80 pub fn label(&self) -> String {
81 format!(
82 "lm={} acoustic={}",
83 if self.eager_lm { "eager" } else { "compiled" },
84 if self.eager_acoustic {
85 "eager"
86 } else {
87 "compiled"
88 }
89 )
90 }
91
92 pub fn print_line(&self) {
93 let dur = self.audio_duration_ms();
94 let rtf = self.rtf();
95 let lm = self.lm_total_ms();
96 println!(
97 "config={} device={:?} frames={} euler={} rtf={:.3} synthesis_ms={:.2} \
98 lm_prefill_ms={:.2} lm_decode_ms={:.2} acoustic_ms={:.2} codec_ms={:.2} \
99 lm_share={:.0}% acoustic_share={:.0}% codec_share={:.0}% \
100 lm_ms_per_frame={:.3} acoustic_ms_per_frame={:.3} velocity_runs={}",
101 self.label(),
102 self.device,
103 self.audio_frames,
104 self.euler_steps_per_frame,
105 rtf,
106 self.synthesis_ms,
107 self.lm_prefill_ms,
108 self.lm_decode_ms,
109 self.acoustic_ms,
110 self.codec_ms,
111 self.stage_share(lm),
112 self.stage_share(self.acoustic_ms),
113 self.stage_share(self.codec_ms),
114 if self.audio_frames > 0 {
115 lm / self.audio_frames as f64
116 } else {
117 0.0
118 },
119 if self.audio_frames > 0 {
120 self.acoustic_ms / self.audio_frames as f64
121 } else {
122 0.0
123 },
124 self.acoustic_velocity_runs,
125 );
126 println!(
127 " audio_ms={:.2} prompt_tokens={} pcm_samples={} embed_ms={:.2}",
128 dur, self.prompt_tokens, self.pcm_samples, self.embed_ms,
129 );
130 }
131}