Skip to main content

adk_audio/
lib.rs

1//! # adk-audio
2//!
3//! Audio intelligence and pipeline orchestration for ADK-Rust agents.
4//!
5//! Provides unified traits for Text-to-Speech (TTS), Speech-to-Text (STT),
6//! music generation, audio FX/DSP processing, and Voice Activity Detection (VAD),
7//! with a composable pipeline system for building voice agent loops, podcast
8//! production, transcription, and generative soundscapes.
9//!
10//! ## Features
11//!
12//! - `tts` (default) — Cloud TTS providers (ElevenLabs, OpenAI, Gemini, Cartesia)
13//! - `stt` (default) — Cloud STT providers (Whisper API, Deepgram, AssemblyAI, Gemini)
14//! - `music` — Music generation providers
15//! - `fx` — DSP processors (normalizer, resampler, noise, compressor)
16//! - `vad` — Voice Activity Detection
17//! - `mlx` — Local inference model loading (tokenizers + HF Hub, cross-platform)
18//! - `onnx` — ONNX Runtime local inference (cross-platform)
19//! - `livekit` — adk-realtime bridge
20//! - `all` — All features (safe for any platform and CI)
21//!
22//! ## Quick Start
23//!
24//! ```rust,ignore
25//! use adk_audio::{AudioPipelineBuilder, AudioFrame};
26//!
27//! let handle = AudioPipelineBuilder::new()
28//!     .tts(my_tts_provider)
29//!     .build_tts()?;
30//! ```
31
32pub mod codec;
33pub mod error;
34pub mod frame;
35pub mod mixer;
36pub mod pipeline;
37pub mod providers;
38pub mod tools;
39pub mod traits;
40
41// Feature-gated modules
42#[cfg(feature = "fx")]
43pub mod fx;
44
45#[cfg(feature = "mlx")]
46pub mod mlx;
47
48#[cfg(feature = "onnx")]
49pub mod onnx;
50
51#[cfg(feature = "livekit")]
52pub mod bridge;
53
54#[cfg(feature = "desktop-audio")]
55pub mod desktop;
56
57#[cfg(feature = "desktop-audio")]
58pub use desktop::{
59    AudioCapture, AudioDevice, AudioPlayback, AudioStream, CaptureConfig, VadConfig, VadMode,
60    VadTurnManager, VoiceActivityEvent,
61};
62
63pub mod registry;
64
65// Re-exports
66pub use codec::{AudioFormat, decode, encode};
67pub use error::{AudioError, AudioResult};
68pub use frame::{AudioFrame, merge_frames};
69pub use mixer::Mixer;
70pub use pipeline::{
71    AudioPipelineBuilder, PipelineControl, PipelineHandle, PipelineInput, PipelineMetrics,
72    PipelineOutput, SentenceChunker,
73};
74pub use tools::{ApplyFxTool, GenerateMusicTool, SpeakTool, TranscribeTool};
75pub use traits::{
76    AudioProcessor, Emotion, FxChain, MusicProvider, MusicRequest, Speaker, SpeechSegment,
77    SttOptions, SttProvider, Transcript, TtsProvider, TtsRequest, VadProcessor, Voice, Word,
78};
79
80// Feature-gated re-exports
81#[cfg(feature = "tts")]
82pub use providers::tts::{
83    CartesiaTts, CloudTtsConfig, ElevenLabsTts, GeminiTts, OpenAiTts, SpeakerConfig,
84};
85
86#[cfg(feature = "qwen3-tts")]
87pub use providers::tts::{Qwen3TtsNativeProvider, Qwen3TtsVariant};
88
89#[cfg(feature = "stt")]
90pub use providers::stt::{AssemblyAiStt, DeepgramStt, GeminiStt, WhisperApiStt};
91
92#[cfg(feature = "fx")]
93pub use fx::{
94    DynamicRangeCompressor, LoudnessNormalizer, NoiseSuppressor, PitchShifter, Resampler,
95    SilenceTrimmer,
96};
97
98#[cfg(feature = "livekit")]
99pub use bridge::RealtimeBridge;
100
101#[cfg(feature = "mlx")]
102pub use mlx::{MlxQuantization, MlxSttConfig, MlxSttProvider, MlxTtsConfig, MlxTtsProvider};
103
104#[cfg(feature = "onnx")]
105pub use onnx::{
106    OnnxExecutionProvider, OnnxModelConfig, OnnxTtsProvider, Preprocessor, PreprocessorOutput,
107    TokenizerPreprocessor,
108};
109
110#[cfg(feature = "kokoro")]
111pub use onnx::{KokoroPreprocessor, KokoroVoices};
112
113#[cfg(feature = "chatterbox")]
114pub use onnx::{ChatterboxConfig, ChatterboxTtsProvider, ChatterboxVariant};
115
116#[cfg(any(feature = "whisper-onnx", feature = "distil-whisper", feature = "moonshine"))]
117pub use onnx::{
118    DistilWhisperVariant, MoonshineVariant, OnnxSttConfig, OnnxSttConfigBuilder, OnnxSttProvider,
119    SttBackend, WhisperModelSize,
120};
121
122pub use registry::LocalModelRegistry;