Skip to main content

voxcpm_rs/
lib.rs

1//! # voxcpm-rs
2//!
3//! Pure-Rust inference for [VoxCPM2](https://huggingface.co/openbmb/VoxCPM2) built on top
4//! of the [Burn](https://burn.dev) ML framework. Supports Vulkan (via `wgpu`) and a CPU
5//! fallback through `ndarray`.
6//!
7//! ## Quick start
8//!
9//! ```no_run
10//! # #[cfg(feature = "cpu")] {
11//! use voxcpm_rs::{GenerateOptions, Prompt, PromptAudio, VoxCPM};
12//!
13//! type B = burn::backend::NdArray<f32>;
14//! let device = Default::default();
15//! let model: VoxCPM<B> = VoxCPM::from_local("./pretrained_models/VoxCPM2", &device).unwrap();
16//!
17//! // Zero-shot:
18//! let wav = model.generate("Hello, world!", GenerateOptions::default()).unwrap();
19//!
20//! // Voice cloning from a reference wav:
21//! let opts = GenerateOptions::builder()
22//!     .timesteps(10)
23//!     .prompt(Prompt::Reference { audio: "speaker.wav".into() })
24//!     .build();
25//! let wav = model.generate("Hello, world!", opts).unwrap();
26//!
27//! voxcpm_rs::audio::write_wav("out.wav", &wav, model.sample_rate()).unwrap();
28//! # }
29//! ```
30//!
31//! See the [`VoxCPM`] struct for the convenience API, or the individual submodules
32//! ([`minicpm4`], [`locdit`], [`locenc`], [`audiovae`]) for low-level access.
33
34// Bumped from the default 128 because enabling burn's `fusion` + `autotune`
35// features pushes the wgpu-core / naga generic chain past the limit.
36#![recursion_limit = "256"]
37#![warn(missing_debug_implementations)]
38
39pub mod audio;
40pub mod audiovae;
41pub mod config;
42pub mod error;
43pub mod fsq;
44pub mod locdit;
45pub mod locenc;
46pub mod minicpm4;
47pub mod tokenizer;
48pub mod voxcpm2;
49pub mod weights;
50
51pub use audiovae::AudioVae;
52pub use config::{
53    AudioVaeConfig, CfmConfig, LoraConfig, MiniCpm4Config, RopeScalingConfig, VoxCpm2Config,
54    VoxCpmDitConfig, VoxCpmEncoderConfig,
55};
56pub use error::{Error, Result};
57pub use voxcpm2::{
58    CancelToken, GenerateOptions, GenerateOptionsBuilder, GenerateStream, Prompt, PromptAudio,
59    VoxCPM,
60};