wavekat_turn/audio/pipecat.rs
1//! Pipecat Smart Turn v3 backend.
2//!
3//! Audio-based turn detection using the Smart Turn ONNX model.
4//! Expects 16 kHz f32 PCM input. Telephony audio at 8 kHz must be
5//! upsampled before feeding to this detector.
6//!
7//! - Model size: ~8 MB (int8 quantized ONNX)
8//! - Inference: ~12 ms on CPU
9//! - License: BSD 2-Clause
10
11use crate::{AudioFrame, AudioTurnDetector, TurnError, TurnPrediction};
12
13/// Pipecat Smart Turn v3 detector.
14///
15/// Buffers up to 8 seconds of audio internally. When [`predict`](AudioTurnDetector::predict)
16/// is called, it takes the last 8s (zero-padded at front if shorter),
17/// extracts Whisper log-mel features, and runs ONNX inference.
18pub struct PipecatSmartTurn {
19 // TODO: ONNX session + audio ring buffer + state
20}
21
22impl PipecatSmartTurn {
23 /// Create a new Smart Turn detector, loading the ONNX model.
24 pub fn new() -> Result<Self, TurnError> {
25 todo!("load Smart Turn v3 ONNX model")
26 }
27}
28
29impl AudioTurnDetector for PipecatSmartTurn {
30 fn push_audio(&mut self, _frame: &AudioFrame) {
31 todo!("append to ring buffer")
32 }
33
34 fn predict(&mut self) -> Result<TurnPrediction, TurnError> {
35 todo!("truncate/pad to 8s, extract mel features, run ONNX inference")
36 }
37
38 fn reset(&mut self) {
39 todo!("clear ring buffer and internal state")
40 }
41}