Struct ModelFiles

Source

pub struct ModelFiles {
    pub config: Option<ModelAsset>,
    pub tokenizer: Option<ModelAsset>,
    pub weights: Vec<ModelAsset>,
    pub voices_dir: Option<ModelAssetDir>,
    pub speech_tokenizer_weights: Vec<ModelAsset>,
    pub speech_tokenizer_config: Option<ModelAsset>,
    pub generation_config: Option<ModelAsset>,
    pub preprocessor_config: Option<ModelAsset>,
}

Expand description

Resolved model assets for loading.

Each model type requires a specific set of files. You can provide them individually using the builder methods on TtsConfig, set TtsConfig::model_path to a directory that contains all of them, or rely on automatic HuggingFace Hub download (if the download feature is enabled).

§File resolution order (per file)

Explicit path — set via with_*_file() / with_*_dir() on TtsConfig. Use this when your project has its own download manager (e.g. flow-like hash-based local caching).
Auto-discovery — if model_path is set, the library looks for well-known filenames inside that directory.
HuggingFace Hub download — if the download feature is enabled and the file is still missing, it is fetched from the Hub. This is the convenient fallback for quick prototyping.

Fields§

§config: Option<ModelAsset>

Path to config.json — model architecture configuration.

Expected format: JSON object describing the neural-network hyperparameters (hidden size, number of layers, vocab size, …). This is the standard HuggingFace config.json format. Each backend stores its architecture metadata here, such as transformer dimensions, tokenizer sizes, sample rates, or auxiliary decoder configuration.

§tokenizer: Option<ModelAsset>

Path to tokenizer.json — BPE text tokenizer definition.

Expected format: HuggingFace Tokenizers self-contained JSON file. Contains the full vocabulary, merge rules, special tokens, and pre/post-processing steps. No separate vocab.json or merges.txt required when this file is present.

Used by both models to convert input text into token IDs before feeding them to the transformer backbone.

§weights: Vec<ModelAsset>

Paths to model weight files (.safetensors).

Expected format: One or more SafeTensors files containing the neural-network parameters.

Single file — model.safetensors (for models < ~5 GB).
Sharded — model-00001-of-00004.safetensors, … When sharded, the library also expects model.safetensors.index.json in the same directory (auto-discovered or downloaded).
Other formats — some backends use consolidated.safetensors or .pth files instead of the standard filename.

§voices_dir: Option<ModelAssetDir>

Path to a voice asset directory for backends that ship preset voices.

Supported layouts include:

voices/                ← Kokoro preset voices (`*.pt`)
voice_embedding/       ← Voxtral preset voices (`*.pt`)

The exact file format depends on the backend.

§speech_tokenizer_weights: Vec<ModelAsset>

Paths to the speech/audio tokenizer decoder weight files.

Expected format: SafeTensors files for the auxiliary decoder used by models that emit discrete audio codec tokens.

Contains:

Residual VQ codebooks (16 groups × 2048 codes × dim)
Pre-conv + pre-transformer layers
Upsampling layers (transposed convolutions + SnakeBeta)
Final decoder convolution
Qwen3-TTS uses the separate Qwen/Qwen3-TTS-Tokenizer-12Hz repository.
OmniVoice uses the audio_tokenizer/ subdirectory inside the main model snapshot.

§speech_tokenizer_config: Option<ModelAsset>

Path to config.json of the speech/audio tokenizer.

Expected format: JSON config for the speech tokenizer decoder model, including codebook dimensions, upsampling ratios, and activation parameters.

If not provided, will be auto-discovered from a nested audio_tokenizer/ directory or downloaded from HuggingFace.

§generation_config: Option<ModelAsset>

Path to generation_config.json (optional).

Expected format: Standard HuggingFace generation configuration with fields like max_new_tokens, top_p, temperature, do_sample, repetition_penalty, etc.

If not provided, sensible per-model defaults are used.

§preprocessor_config: Option<ModelAsset>

Path to preprocessor_config.json (optional).

Used by backends such as VibeVoice that publish prompt-building and audio-normalization defaults separately from config.json.

Struct ModelFiles Copy item path

§File resolution order (per file)

Fields§

Implementations§

impl ModelFiles

pub fn fill_from_directory(&mut self, dir: &Path)

pub fn fill_from_asset_bundle(&mut self, bundle: &ModelAssetBundle)

pub fn load_safetensors_vb( assets: &[ModelAsset], dtype: DType, device: &Device, ) -> Result<VarBuilder<'static>, TtsError>

pub fn fill_from_hf( &mut self, model_id: &str, model_type: ModelType, bearer_token: Option<&str>, ) -> Result<(), TtsError>

pub fn validate(&self, model_type: ModelType) -> Result<(), TtsError>

pub fn missing_files(&self, model_type: ModelType) -> Vec<&'static str>

Trait Implementations§

impl Clone for ModelFiles

fn clone(&self) -> ModelFiles

fn clone_from(&mut self, source: &Self)

impl Debug for ModelFiles

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for ModelFiles

fn default() -> ModelFiles

Auto Trait Implementations§

impl Freeze for ModelFiles

impl RefUnwindSafe for ModelFiles

impl Send for ModelFiles

impl Sync for ModelFiles

impl Unpin for ModelFiles

impl UnsafeUnpin for ModelFiles

impl UnwindSafe for ModelFiles

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<T> ErasedDestructor for Twhere T: 'static,

Struct ModelFiles

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> ErasedDestructor for T
where T: 'static,