Skip to main content

atomr_agents_stt_core/
lib.rs

1//! Core types for the atomr-agents speech-to-text capability.
2//!
3//! This crate is intentionally I/O-free: it defines the
4//! [`SpeechToText`] and [`StreamingSession`] traits, the rich
5//! [`Capabilities`] struct that backends advertise via a
6//! `pub const`, the audio-input and transcript data types, and a
7//! deterministic [`MockSpeechToText`] for tests.
8//!
9//! Concrete backends live in sibling crates:
10//!
11//! - `atomr-agents-stt-runtime-openai` — OpenAI Whisper REST.
12//! - `atomr-agents-stt-runtime-deepgram` — Deepgram REST + WS.
13//! - `atomr-agents-stt-runtime-assemblyai` — AssemblyAI REST + WS.
14//! - `atomr-agents-stt-runtime-whisper` — local whisper-rs.
15//!
16//! Audio I/O (`symphonia`, `cpal`) lives in `atomr-agents-stt-audio`,
17//! the higher-level voice-session abstraction in
18//! `atomr-agents-stt-voice`, and the agent-framework adapters in
19//! `atomr-agents-stt-tool`.
20
21mod audio;
22mod capabilities;
23mod error;
24mod kinds;
25mod mock;
26mod stream;
27mod trait_;
28mod transcript;
29
30pub use audio::{AudioFormat, AudioInput, PcmBuffer, SampleType};
31pub use capabilities::{Capabilities, DiarizationSupport, Languages};
32pub use error::{Result, SttError};
33pub use kinds::{BackendKind, TransportKind};
34pub use mock::MockSpeechToText;
35pub use stream::{StreamEvent, StreamOptions, StreamingSession};
36pub use trait_::{DynSpeechToText, SpeechToText, TranscribeOptions};
37pub use transcript::{Segment, SpeakerTag, Transcript, Word};