pub struct TtsModelExecutor { /* private fields */ }Expand description
Qwen3-TTS executor: text-to-speech synthesis.
Implementations§
Source§impl TtsModelExecutor
impl TtsModelExecutor
Sourcepub fn from_path(
model_path: &str,
device: CandleDevice,
dtype: DType,
) -> Result<Self>
pub fn from_path( model_path: &str, device: CandleDevice, dtype: DType, ) -> Result<Self>
Load from model directory containing:
- config.json (TalkerConfig)
- model.safetensors (Talker weights)
- speech_tokenizer/model.safetensors (Vocoder weights)
- tokenizer_config.json + vocab.json + merges.txt (text tokenizer)
Sourcepub fn synthesize(&mut self, text: &str, language: &str) -> Result<Vec<f32>>
pub fn synthesize(&mut self, text: &str, language: &str) -> Result<Vec<f32>>
Synthesize speech from text.
Returns PCM samples at 24kHz as Vec
Prompt structure (matches Python/qwen3-tts-rs): Prefill: [role_prefix(3)] + [tts_text_prefix(6) + codec_prefix(6)] + [first_text + codec_bos] Trailing: text_projection(remaining_text + tts_eos) — added per decode step
Sourcepub fn synthesize_streaming<F: FnMut(usize, &[f32])>(
&mut self,
text: &str,
language: &str,
chunk_frames: usize,
on_chunk: F,
) -> Result<Vec<Vec<f32>>>
pub fn synthesize_streaming<F: FnMut(usize, &[f32])>( &mut self, text: &str, language: &str, chunk_frames: usize, on_chunk: F, ) -> Result<Vec<Vec<f32>>>
Streaming TTS: calls on_chunk with each audio chunk as soon as it’s ready.
Each chunk is chunk_frames codec frames decoded to audio (~800ms at default 10 frames).
First chunk arrives after chunk_frames decode steps (~2-3s for 0.6B).
Sourcepub fn sample_rate(&self) -> usize
pub fn sample_rate(&self) -> usize
Get the output sample rate.
pub fn config(&self) -> &TalkerConfig
Sourcepub fn synthesize_voice_clone(
&mut self,
text: &str,
language: &str,
ref_audio_path: &str,
ref_text: &str,
) -> Result<Vec<f32>>
pub fn synthesize_voice_clone( &mut self, text: &str, language: &str, ref_audio_path: &str, ref_text: &str, ) -> Result<Vec<f32>>
Synthesize speech with voice cloning from a reference audio.
Uses ICL (in-context learning) prompting: the reference audio is encoded to codec tokens and prepended to the generation prompt, along with a speaker embedding extracted via ECAPA-TDNN.
Returns PCM samples at 24kHz as Vec
Trait Implementations§
Source§impl ModelExecutor for TtsModelExecutor
impl ModelExecutor for TtsModelExecutor
Source§fn prefill<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: &'life1 PrefillInput,
) -> Pin<Box<dyn Future<Output = Result<PrefillOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn prefill<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: &'life1 PrefillInput,
) -> Pin<Box<dyn Future<Output = Result<PrefillOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Source§fn decode<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: &'life1 DecodeInput,
) -> Pin<Box<dyn Future<Output = Result<DecodeOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn decode<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: &'life1 DecodeInput,
) -> Pin<Box<dyn Future<Output = Result<DecodeOutput>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Source§fn capabilities(&self) -> ExecutorCapabilities
fn capabilities(&self) -> ExecutorCapabilities
Source§fn release_cache(&self, _: &str)
fn release_cache(&self, _: &str)
Source§fn status(&self) -> ExecutorStatus
fn status(&self) -> ExecutorStatus
Source§fn batch_decode<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn batch_decode<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Source§fn forward<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: &'life1 Arc<dyn TensorLike>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn TensorLike>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn forward<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: &'life1 Arc<dyn TensorLike>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn TensorLike>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Source§fn truncate_kv<'life0, 'life1, 'async_trait>(
&'life0 self,
_kv_cache: &'life1 Arc<dyn KvCacheHandle>,
_new_len: usize,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn truncate_kv<'life0, 'life1, 'async_trait>(
&'life0 self,
_kv_cache: &'life1 Arc<dyn KvCacheHandle>,
_new_len: usize,
) -> Pin<Box<dyn Future<Output = Result<(), FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
new_len.
Used by speculative decoding on partial rejection so the next
iteration sees a KV prefix that matches the accepted token stream.
Default: Ok(()) — executors that don’t cache per-sequence state
(stub, mock) are inherently tolerant; real LLM executors override.Source§fn forward_verify<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn forward_verify<'life0, 'life1, 'async_trait>(
&'life0 self,
inputs: &'life1 [DecodeInput],
) -> Pin<Box<dyn Future<Output = Result<Vec<DecodeOutput>, FerrumError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
N+1 tokens,
producing one logits row per position. Used by speculative
decoding’s target path so we don’t pay N+1 sequential forwards. Read moreAuto Trait Implementations§
impl !Freeze for TtsModelExecutor
impl !RefUnwindSafe for TtsModelExecutor
impl Send for TtsModelExecutor
impl Sync for TtsModelExecutor
impl Unpin for TtsModelExecutor
impl UnsafeUnpin for TtsModelExecutor
impl !UnwindSafe for TtsModelExecutor
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more