1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
//! # Voxudio
//!
//! `voxudio` is a real-time audio processing library with ONNX runtime support.
//! It provides a set of tools for audio device management, signal processing,
//! and machine learning model integration for audio applications.
//!
//! ## Features
//!
//! - Audio device enumeration and management
//! - Real-time audio processing capabilities
//! - ONNX model integration for audio machine learning tasks
//! - OPUS audio codec support (encoding/decoding)
//! - Online feature extraction (FBank, MFCC, Whisper FBank) based on kaldi-native-fbank
//! - Builder pattern with `with_*` methods for flexible parameter configuration (e.g., number of mel bins, window type, etc.)
//! - Automatic Speech Recognition (ASR) API
//! - Provides `AutomaticSpeechRecognizer` for direct feature-to-text recognition
//! - All public APIs are documented with usage examples
//! - Cross-platform support
//!
//! ## Example
//!
//! ### Speaker embedding extraction example
//! ```rust,no_run
//! use voxudio::*;
//! use anyhow::Result;
//!
//! #[tokio::main]
//! async fn main() -> Result<()> {
//! // Initialize voice activity detector and speaker embedding extractor
//! let mut vad = VoiceActivityDetector::new("checkpoint/voice_activity_detector.onnx")?;
//! let mut see = SpeakerEmbeddingExtractor::new("checkpoint/speaker_embedding_extractor.onnx")?;
//!
//! // Load audio file
//! let (audio, channels) = load_audio::<22050, f32, _>("../asset/test.wav", false).await?;
//!
//! // Detect speech segments
//! let vad_audio = vad.retain_speech_only::<22050>(&audio, channels).await?;
//!
//! // Extract speaker embedding
//! let embedding = see.extract(&vad_audio, channels).await?;
//! println!("Extracted embedding: {:?}", embedding);
//!
//! Ok(())
//! }
//! ```
//!
//! ### Online feature extraction example
//! ```rust,no_run
//! use voxudio::*;
//! use anyhow::Result;
//!
//! #[tokio::main]
//! async fn main() -> Result<()> {
//! // Build an online FBank feature extractor (algorithm from kaldi-native-fbank)
//! let extractor = OnlineFbankFeatureExtractor::fbank()?
//! .with_energy_floor(1.0)
//! .build()?;
//! // Load audio file
//! let (audio, channels) = load_audio::<16000, f32, _>("../asset/test.wav", true).await?;
//! // Extract FBank features
//! let features = extractor.extract::<16000>(&audio);
//! println!("FBank features: {:?}", features);
//!
//! Ok(())
//! }
//! ```
//!
//! ### Automatic Speech Recognition example
//! ```rust,no_run
//! use voxudio::*;
//! use anyhow::Result;
//!
//! #[tokio::main]
//! async fn main() -> Result<()> {
//! let mut asr = AutomaticSpeechRecognizer::new_legacy("checkpoint/automatic_speech_recognizer.onnx")?;
//! let features = vec![0.0; AutomaticSpeechRecognizer::NUM_BINS as usize * 10]; // Assume features are extracted
//! let text = asr.recognize(&features).await?;
//! println!("{}", text);
//! Ok(())
//! }
//! ```
//! See more from `examples`.
//!
//! ## License
//!
//! This project is licensed under the Apache License, Version 2.0.
pub use *;
pub use *;
pub use *;
pub use *;
pub use *;
pub use ;