Skip to main content

adk_audio/pipeline/
chunker.rs

1//! Sentence-chunked streaming for voice agent pipelines.
2
3/// Buffers LLM tokens and emits complete sentences at delimiter boundaries.
4///
5/// This reduces time-to-first-audio by sending each sentence to TTS
6/// as soon as it's complete, rather than waiting for the full response.
7pub struct SentenceChunker {
8    buffer: String,
9    delimiters: Vec<char>,
10}
11
12impl SentenceChunker {
13    /// Create a new chunker with default delimiters (`.!?;\n`).
14    pub fn new() -> Self {
15        Self { buffer: String::new(), delimiters: vec!['.', '!', '?', ';', '\n'] }
16    }
17
18    /// Push a token and return any complete sentences.
19    pub fn push(&mut self, token: &str) -> Vec<String> {
20        self.buffer.push_str(token);
21        let mut sentences = Vec::new();
22        while let Some(pos) = self.buffer.chars().position(|c| self.delimiters.contains(&c)) {
23            let byte_pos = self
24                .buffer
25                .char_indices()
26                .nth(pos + 1)
27                .map(|(i, _)| i)
28                .unwrap_or(self.buffer.len());
29            let sentence: String = self.buffer.drain(..byte_pos).collect();
30            let trimmed = sentence.trim().to_string();
31            if !trimmed.is_empty() {
32                sentences.push(trimmed);
33            }
34        }
35        sentences
36    }
37
38    /// Flush the remaining buffer as a final sentence.
39    pub fn flush(&mut self) -> Option<String> {
40        let remaining = self.buffer.trim().to_string();
41        self.buffer.clear();
42        if remaining.is_empty() { None } else { Some(remaining) }
43    }
44}
45
46impl Default for SentenceChunker {
47    fn default() -> Self {
48        Self::new()
49    }
50}