Skip to main content

parakeet_rs/
lib.rs

1//! # parakeet-rs
2//!
3//! Rust bindings for NVIDIA's Parakeet speech recognition model using ONNX Runtime.
4//!
5//! Parakeet is a state-of-the-art automatic speech recognition (ASR) model developed by NVIDIA,
6//! based on the FastConformer-TDT architecture with 600 million parameters.
7//!
8//! ## Features
9//!
10//! - Easy-to-use API for speech-to-text transcription
11//! - Support for ONNX format models
12//! - 16kHz mono audio input
13//! - Punctuation and capitalization included in output
14//! - Fast inference using ONNX Runtime
15//!
16//! ## Quick Start
17//!
18//! ```ignore
19//! use parakeet_rs::{Parakeet, Transcriber, TimestampMode};
20//!
21//! // Load the model
22//! let mut parakeet = Parakeet::from_pretrained(".")?;
23//!
24//! // Transcribe audio samples (see examples/raw.rs for audio loading)
25//! let result = parakeet.transcribe_samples(audio, sample_rate, channels, Some(TimestampMode::Words))?;
26//! println!("Transcription: {}", result.text);
27//! ```
28//!
29//! ## Model Requirements
30//!
31//! Your model directory should contain:
32//! - `model.onnx` - The ONNX model file
33//! - `model.onnx_data` - External model weights
34//! - `config.json` - Model configuration
35//! - `preprocessor_config.json` - Audio preprocessing configuration
36//! - `tokenizer.json` - Tokenizer vocabulary
37//! - `tokenizer_config.json` - Tokenizer configuration
38//!
39//! ## Audio Requirements
40//!
41//! - Format: WAV
42//! - Sample Rate: 16kHz
43//! - Channels: Mono (stereo will be converted automatically)
44//! - Bit Depth: 16-bit PCM or 32-bit float
45
46mod audio;
47mod config;
48mod decoder;
49mod decoder_tdt;
50mod error;
51mod execution;
52mod model;
53mod model_eou;
54mod model_nemotron;
55mod model_tdt;
56mod model_unified;
57mod nemotron;
58mod parakeet;
59mod parakeet_eou;
60mod parakeet_tdt;
61mod parakeet_unified;
62#[cfg(feature = "sortformer")]
63pub mod sortformer;
64#[cfg(feature = "multitalker")]
65mod model_multitalker;
66#[cfg(feature = "multitalker")]
67pub mod multitalker;
68#[cfg(feature = "cohere")]
69mod model_cohere;
70#[cfg(feature = "cohere")]
71pub mod cohere;
72mod timestamps;
73mod transcriber;
74mod vocab;
75
76pub use error::{Error, Result};
77pub use execution::{ExecutionProvider, ModelConfig as ExecutionConfig};
78pub use parakeet::Parakeet;
79pub use parakeet_tdt::ParakeetTDT;
80pub use timestamps::TimestampMode;
81pub use transcriber::*;
82
83pub use config::{ModelConfig as ModelConfigJson, PreprocessorConfig};
84
85pub use decoder::{ParakeetDecoder, TimedToken, TranscriptionResult};
86pub use model::ParakeetModel;
87pub use model_eou::ParakeetEOUModel;
88pub use model_nemotron::{NemotronEncoderCache, NemotronModel, NemotronModelConfig};
89pub use model_unified::{ParakeetUnifiedModel, UnifiedModelConfig};
90pub use nemotron::{Nemotron, NemotronHandle, SentencePieceVocab};
91pub use parakeet_eou::{ParakeetEOU, ParakeetEOUHandle};
92pub use parakeet_unified::{ParakeetUnified, ParakeetUnifiedHandle, UnifiedStreamingConfig};
93
94#[cfg(feature = "multitalker")]
95pub use multitalker::{LatencyMode, MultitalkerASR, MultitalkerConfig, SpeakerTranscript, WordTimestamp};
96
97#[cfg(feature = "cohere")]
98pub use cohere::CohereASR;