parakeet_rs/lib.rs
1//! # parakeet-rs
2//!
3//! Rust bindings for NVIDIA's Parakeet speech recognition model using ONNX Runtime.
4//!
5//! Parakeet is a state-of-the-art automatic speech recognition (ASR) model developed by NVIDIA,
6//! based on the FastConformer-TDT architecture with 600 million parameters.
7//!
8//! ## Features
9//!
10//! - Easy-to-use API for speech-to-text transcription
11//! - Support for ONNX format models
12//! - 16kHz mono audio input
13//! - Punctuation and capitalization included in output
14//! - Fast inference using ONNX Runtime
15//!
16//! ## Quick Start
17//!
18//! ```ignore
19//! use parakeet_rs::{Parakeet, Transcriber, TimestampMode};
20//!
21//! // Load the model
22//! let mut parakeet = Parakeet::from_pretrained(".")?;
23//!
24//! // Transcribe audio samples (see examples/raw.rs for audio loading)
25//! let result = parakeet.transcribe_samples(audio, sample_rate, channels, Some(TimestampMode::Words))?;
26//! println!("Transcription: {}", result.text);
27//! ```
28//!
29//! ## Model Requirements
30//!
31//! Your model directory should contain:
32//! - `model.onnx` - The ONNX model file
33//! - `model.onnx_data` - External model weights
34//! - `config.json` - Model configuration
35//! - `preprocessor_config.json` - Audio preprocessing configuration
36//! - `tokenizer.json` - Tokenizer vocabulary
37//! - `tokenizer_config.json` - Tokenizer configuration
38//!
39//! ## Audio Requirements
40//!
41//! - Format: WAV
42//! - Sample Rate: 16kHz
43//! - Channels: Mono (stereo will be converted automatically)
44//! - Bit Depth: 16-bit PCM or 32-bit float
45
46mod audio;
47mod config;
48mod decoder;
49mod decoder_tdt;
50mod error;
51mod execution;
52mod model;
53mod model_eou;
54mod model_nemotron;
55mod model_tdt;
56mod nemotron;
57mod parakeet;
58mod parakeet_eou;
59mod parakeet_tdt;
60#[cfg(feature = "sortformer")]
61pub mod sortformer;
62mod timestamps;
63mod transcriber;
64mod vocab;
65
66pub use error::{Error, Result};
67pub use execution::{ExecutionProvider, ModelConfig as ExecutionConfig};
68pub use parakeet::Parakeet;
69pub use parakeet_tdt::ParakeetTDT;
70pub use timestamps::TimestampMode;
71pub use transcriber::*;
72
73pub use config::{ModelConfig as ModelConfigJson, PreprocessorConfig};
74
75pub use decoder::{ParakeetDecoder, TimedToken, TranscriptionResult};
76pub use model::ParakeetModel;
77pub use model_eou::ParakeetEOUModel;
78pub use model_nemotron::{NemotronEncoderCache, NemotronModel, NemotronModelConfig};
79pub use nemotron::{Nemotron, SentencePieceVocab};
80pub use parakeet_eou::ParakeetEOU;