Skip to main content

gemini_adk_rs/live/
transcript.rs

1//! Transcript accumulation buffer for Live sessions.
2//!
3//! Automatically accumulates input/output transcripts per-turn with windowing
4//! support for OOB extraction pipelines.
5
6use std::collections::VecDeque;
7use std::fmt::Write;
8use std::time::Instant;
9
10/// Summary of a tool call within a conversation turn.
11#[derive(Debug, Clone)]
12pub struct ToolCallSummary {
13    /// Name of the tool that was called.
14    pub name: String,
15    /// First 200 chars of JSON args.
16    pub args_summary: String,
17    /// First 200 chars of JSON result.
18    pub result_summary: String,
19}
20
21/// A single completed conversation turn with accumulated transcripts.
22#[derive(Debug, Clone)]
23pub struct TranscriptTurn {
24    /// Sequential turn number (0-based).
25    pub turn_number: u32,
26    /// Accumulated user (input) transcript for this turn.
27    pub user: String,
28    /// Accumulated model (output) transcript for this turn.
29    pub model: String,
30    /// Tool calls that occurred during this turn.
31    pub tool_calls: Vec<ToolCallSummary>,
32    /// When this turn was finalized.
33    pub timestamp: Instant,
34}
35
36/// Default maximum number of completed turns retained in the ring buffer.
37const DEFAULT_MAX_TURNS: usize = 50;
38
39/// Accumulates input/output transcripts and segments them by turn boundaries.
40///
41/// Uses a ring buffer (`VecDeque`) that evicts the oldest turns when
42/// `max_turns` is reached. This prevents unbounded memory growth in
43/// long-running voice sessions.
44///
45/// Thread safety: wrap in `Arc<parking_lot::Mutex<TranscriptBuffer>>` when
46/// sharing between fast lane (push) and control lane (end_turn / window).
47#[derive(Debug)]
48pub struct TranscriptBuffer {
49    turns: VecDeque<TranscriptTurn>,
50    current_user: String,
51    current_model: String,
52    tool_calls_pending: Vec<ToolCallSummary>,
53    turn_count: u32,
54    max_turns: usize,
55}
56
57/// Truncate a string to at most `max_chars` characters, reusing the original when possible.
58fn truncate_string(mut s: String, max_chars: usize) -> String {
59    if s.len() <= max_chars {
60        return s; // fast path: ASCII strings under limit
61    }
62    // Find the byte index of the max_chars-th char boundary
63    if let Some((idx, _)) = s.char_indices().nth(max_chars) {
64        s.truncate(idx);
65    }
66    s
67}
68
69impl TranscriptBuffer {
70    /// Create a new transcript buffer with the default capacity.
71    pub fn new() -> Self {
72        Self::with_capacity(DEFAULT_MAX_TURNS)
73    }
74
75    /// Create a buffer with a custom maximum turn capacity.
76    ///
77    /// When the buffer reaches `max_turns` completed turns, the oldest
78    /// turn is evicted on each new `end_turn()`.
79    pub fn with_capacity(max_turns: usize) -> Self {
80        Self {
81            turns: VecDeque::with_capacity(max_turns.min(64)),
82            current_user: String::new(),
83            current_model: String::new(),
84            tool_calls_pending: Vec::new(),
85            turn_count: 0,
86            max_turns,
87        }
88    }
89
90    /// Append input (user speech) transcript text.
91    pub fn push_input(&mut self, text: &str) {
92        self.current_user.push_str(text);
93    }
94
95    /// Append output (model speech) transcript text.
96    pub fn push_output(&mut self, text: &str) {
97        self.current_model.push_str(text);
98    }
99
100    /// Record a tool call summary for the current turn.
101    ///
102    /// Args and result are truncated to 200 characters of their JSON representation.
103    pub fn push_tool_call(
104        &mut self,
105        name: String,
106        args: &serde_json::Value,
107        result: &serde_json::Value,
108    ) {
109        let args_str = serde_json::to_string(args).unwrap_or_default();
110        let result_str = serde_json::to_string(result).unwrap_or_default();
111        self.tool_calls_pending.push(ToolCallSummary {
112            name,
113            args_summary: truncate_string(args_str, 200),
114            result_summary: truncate_string(result_str, 200),
115        });
116    }
117
118    /// Finalize the current turn and return it.
119    ///
120    /// Resets the current accumulators for the next turn.
121    /// Only creates a turn if there is any transcript content.
122    pub fn end_turn(&mut self) -> Option<TranscriptTurn> {
123        if self.current_user.is_empty()
124            && self.current_model.is_empty()
125            && self.tool_calls_pending.is_empty()
126        {
127            return None;
128        }
129
130        let turn = TranscriptTurn {
131            turn_number: self.turn_count,
132            user: std::mem::take(&mut self.current_user),
133            model: std::mem::take(&mut self.current_model),
134            tool_calls: std::mem::take(&mut self.tool_calls_pending),
135            timestamp: Instant::now(),
136        };
137        self.turn_count += 1;
138        // Evict oldest turn if at capacity
139        if self.turns.len() >= self.max_turns {
140            self.turns.pop_front();
141        }
142        self.turns.push_back(turn);
143        Some(self.turns.back().unwrap().clone())
144    }
145
146    /// Get the last `n` completed turns as a contiguous slice.
147    ///
148    /// Requires `&mut self` to ensure VecDeque contiguity.
149    pub fn window(&mut self, n: usize) -> &[TranscriptTurn] {
150        let slice = self.turns.make_contiguous();
151        let start = slice.len().saturating_sub(n);
152        &slice[start..]
153    }
154
155    /// All completed turns as a contiguous slice.
156    ///
157    /// Requires `&mut self` to ensure VecDeque contiguity.
158    pub fn all_turns(&mut self) -> &[TranscriptTurn] {
159        self.turns.make_contiguous()
160    }
161
162    /// Number of retained turns (may be less than `turn_count` due to eviction).
163    pub fn retained_count(&self) -> usize {
164        self.turns.len()
165    }
166
167    /// Number of completed turns.
168    pub fn turn_count(&self) -> u32 {
169        self.turn_count
170    }
171
172    /// Format the last `n` turns as a human-readable transcript for LLM consumption.
173    pub fn format_window(&mut self, n: usize) -> String {
174        let window = self.window(n);
175        let mut out = String::new();
176        for turn in window {
177            if !turn.user.is_empty() {
178                let _ = writeln!(out, "User: {}", turn.user.trim());
179            }
180            for tc in &turn.tool_calls {
181                let _ = writeln!(
182                    out,
183                    "[Tool: {}({}) \u{2192} {}]",
184                    tc.name, tc.args_summary, tc.result_summary
185                );
186            }
187            if !turn.model.is_empty() {
188                let _ = writeln!(out, "Assistant: {}", turn.model.trim());
189            }
190            let _ = writeln!(out);
191        }
192        out
193    }
194
195    /// Set server-provided input transcription for current turn.
196    /// Overwrites client-accumulated input if server transcription is available.
197    pub fn set_input_transcription(&mut self, text: &str) {
198        self.current_user.clear();
199        self.current_user.push_str(text);
200    }
201
202    /// Set server-provided output transcription for current turn.
203    pub fn set_output_transcription(&mut self, text: &str) {
204        self.current_model.clear();
205        self.current_model.push_str(text);
206    }
207
208    /// Truncate the current model turn in progress. Called on interruption.
209    /// Only what was already delivered to the client is retained.
210    pub fn truncate_current_model_turn(&mut self) {
211        self.current_model.clear();
212    }
213
214    /// Whether there is any pending (un-finalized) transcript content.
215    pub fn has_pending(&self) -> bool {
216        !self.current_user.is_empty()
217            || !self.current_model.is_empty()
218            || !self.tool_calls_pending.is_empty()
219    }
220
221    /// Create a `TranscriptWindow` snapshot of the last `n` completed turns.
222    ///
223    /// This is a cheap clone operation designed for passing to phase callbacks.
224    pub fn snapshot_window(&mut self, n: usize) -> TranscriptWindow {
225        TranscriptWindow::new(self.window(n).to_vec())
226    }
227
228    /// Snapshot including the current in-progress turn (not yet finalized).
229    ///
230    /// Used by `GenerationComplete` extractors to see the model's full output
231    /// before interruption truncation clears `current_model`.
232    pub fn snapshot_window_with_current(&mut self, n: usize) -> TranscriptWindow {
233        let mut turns: Vec<TranscriptTurn> = self.window(n).to_vec();
234        if self.has_pending() {
235            turns.push(TranscriptTurn {
236                turn_number: self.turn_count,
237                user: self.current_user.clone(),
238                model: self.current_model.clone(),
239                tool_calls: self.tool_calls_pending.clone(),
240                timestamp: std::time::Instant::now(),
241            });
242        }
243        TranscriptWindow::new(turns)
244    }
245}
246
247/// A read-only snapshot of recent transcript turns for context construction.
248///
249/// Cheap to create (clone of ~5 small structs). Used by `on_enter_context`
250/// callbacks to reference recent conversation without holding the buffer lock.
251#[derive(Debug, Clone)]
252pub struct TranscriptWindow {
253    turns: Vec<TranscriptTurn>,
254}
255
256impl TranscriptWindow {
257    /// Create a window from a vec of turns.
258    pub fn new(turns: Vec<TranscriptTurn>) -> Self {
259        Self { turns }
260    }
261
262    /// The turns in this window.
263    pub fn turns(&self) -> &[TranscriptTurn] {
264        &self.turns
265    }
266
267    /// Format all turns as human-readable text for LLM consumption.
268    pub fn formatted(&self) -> String {
269        use std::fmt::Write as _;
270        let mut out = String::new();
271        for turn in &self.turns {
272            if !turn.user.is_empty() {
273                let _ = writeln!(out, "User: {}", turn.user.trim());
274            }
275            for tc in &turn.tool_calls {
276                let _ = writeln!(
277                    out,
278                    "[Tool: {}({}) \u{2192} {}]",
279                    tc.name, tc.args_summary, tc.result_summary
280                );
281            }
282            if !turn.model.is_empty() {
283                let _ = writeln!(out, "Assistant: {}", turn.model.trim());
284            }
285            let _ = writeln!(out);
286        }
287        out
288    }
289
290    /// Last user utterance, if any.
291    pub fn last_user(&self) -> Option<&str> {
292        self.turns
293            .iter()
294            .rev()
295            .find(|t| !t.user.is_empty())
296            .map(|t| t.user.as_str())
297    }
298
299    /// Last model utterance, if any.
300    pub fn last_model(&self) -> Option<&str> {
301        self.turns
302            .iter()
303            .rev()
304            .find(|t| !t.model.is_empty())
305            .map(|t| t.model.as_str())
306    }
307
308    /// Number of turns in this window.
309    pub fn len(&self) -> usize {
310        self.turns.len()
311    }
312
313    /// Whether the window is empty.
314    pub fn is_empty(&self) -> bool {
315        self.turns.is_empty()
316    }
317}
318
319impl Default for TranscriptBuffer {
320    fn default() -> Self {
321        Self::new()
322    }
323}
324
325#[cfg(test)]
326mod tests {
327    use super::*;
328
329    #[test]
330    fn accumulate_and_end_turn() {
331        let mut buf = TranscriptBuffer::new();
332        buf.push_input("Hello ");
333        buf.push_input("there");
334        buf.push_output("Hi! How can I help?");
335        let turn = buf.end_turn().expect("should produce a turn");
336        assert_eq!(turn.turn_number, 0);
337        assert_eq!(turn.user, "Hello there");
338        assert_eq!(turn.model, "Hi! How can I help?");
339        assert_eq!(buf.turn_count(), 1);
340    }
341
342    #[test]
343    fn end_turn_empty_returns_none() {
344        let mut buf = TranscriptBuffer::new();
345        assert!(buf.end_turn().is_none());
346    }
347
348    #[test]
349    fn window_returns_last_n() {
350        let mut buf = TranscriptBuffer::new();
351        for i in 0..5 {
352            buf.push_input(&format!("user-{i}"));
353            buf.push_output(&format!("model-{i}"));
354            buf.end_turn();
355        }
356        let w = buf.window(3);
357        assert_eq!(w.len(), 3);
358        assert_eq!(w[0].turn_number, 2);
359        assert_eq!(w[1].turn_number, 3);
360        assert_eq!(w[2].turn_number, 4);
361    }
362
363    #[test]
364    fn window_larger_than_turns() {
365        let mut buf = TranscriptBuffer::new();
366        buf.push_input("only turn");
367        buf.end_turn();
368        let w = buf.window(10);
369        assert_eq!(w.len(), 1);
370    }
371
372    #[test]
373    fn format_window_produces_readable_text() {
374        let mut buf = TranscriptBuffer::new();
375        buf.push_input("What's the weather?");
376        buf.push_output("It's sunny and 22 degrees.");
377        buf.end_turn();
378        buf.push_input("And tomorrow?");
379        buf.push_output("Rain expected.");
380        buf.end_turn();
381
382        let formatted = buf.format_window(2);
383        assert!(formatted.contains("User: What's the weather?"));
384        assert!(formatted.contains("Assistant: It's sunny and 22 degrees."));
385        assert!(formatted.contains("User: And tomorrow?"));
386        assert!(formatted.contains("Assistant: Rain expected."));
387    }
388
389    #[test]
390    fn has_pending() {
391        let mut buf = TranscriptBuffer::new();
392        assert!(!buf.has_pending());
393        buf.push_input("hello");
394        assert!(buf.has_pending());
395        buf.end_turn();
396        assert!(!buf.has_pending());
397    }
398
399    #[test]
400    fn set_input_transcription_overwrites_accumulated() {
401        let mut buf = TranscriptBuffer::new();
402        buf.push_input("partial ");
403        buf.push_input("input");
404        // Server provides authoritative transcription
405        buf.set_input_transcription("server transcription");
406        let turn = buf.end_turn().expect("should produce a turn");
407        assert_eq!(turn.user, "server transcription");
408    }
409
410    #[test]
411    fn set_output_transcription_overwrites_accumulated() {
412        let mut buf = TranscriptBuffer::new();
413        buf.push_output("partial ");
414        buf.push_output("output");
415        // Server provides authoritative transcription
416        buf.set_output_transcription("server output");
417        let turn = buf.end_turn().expect("should produce a turn");
418        assert_eq!(turn.model, "server output");
419    }
420
421    #[test]
422    fn truncate_current_model_turn_clears_model_text() {
423        let mut buf = TranscriptBuffer::new();
424        buf.push_input("user said something");
425        buf.push_output("model was saying something but got");
426        // Interruption happens
427        buf.truncate_current_model_turn();
428        assert!(buf.has_pending()); // user text is still there
429        let turn = buf.end_turn().expect("should produce a turn");
430        assert_eq!(turn.user, "user said something");
431        assert_eq!(turn.model, ""); // model output was truncated
432    }
433
434    #[test]
435    fn multiple_turns_all_tracked() {
436        let mut buf = TranscriptBuffer::new();
437        buf.push_input("a");
438        buf.end_turn();
439        buf.push_output("b");
440        buf.end_turn();
441        buf.push_input("c");
442        buf.push_output("d");
443        buf.end_turn();
444        assert_eq!(buf.all_turns().len(), 3);
445        assert_eq!(buf.turn_count(), 3);
446    }
447
448    #[test]
449    fn push_tool_call_records_summary() {
450        let mut buf = TranscriptBuffer::new();
451        buf.push_input("check weather");
452        buf.push_tool_call(
453            "get_weather".to_string(),
454            &serde_json::json!({"city": "London"}),
455            &serde_json::json!({"temp": 22, "condition": "sunny"}),
456        );
457        buf.push_output("It's sunny in London.");
458        let turn = buf.end_turn().expect("should produce a turn");
459        assert_eq!(turn.tool_calls.len(), 1);
460        assert_eq!(turn.tool_calls[0].name, "get_weather");
461        assert!(turn.tool_calls[0].args_summary.contains("London"));
462        assert!(turn.tool_calls[0].result_summary.contains("sunny"));
463    }
464
465    #[test]
466    fn push_tool_call_truncates_long_args() {
467        let mut buf = TranscriptBuffer::new();
468        let long_value = "x".repeat(500);
469        buf.push_input("do something");
470        buf.push_tool_call(
471            "big_tool".to_string(),
472            &serde_json::json!({"data": long_value}),
473            &serde_json::json!({"ok": true}),
474        );
475        let turn = buf.end_turn().expect("should produce a turn");
476        assert!(turn.tool_calls[0].args_summary.chars().count() <= 200);
477    }
478
479    #[test]
480    fn multiple_tool_calls_in_one_turn() {
481        let mut buf = TranscriptBuffer::new();
482        buf.push_input("plan my trip");
483        buf.push_tool_call(
484            "get_weather".to_string(),
485            &serde_json::json!({"city": "Paris"}),
486            &serde_json::json!({"temp": 18}),
487        );
488        buf.push_tool_call(
489            "get_flights".to_string(),
490            &serde_json::json!({"from": "NYC", "to": "Paris"}),
491            &serde_json::json!({"price": 450}),
492        );
493        buf.push_output("Here's your trip plan.");
494        let turn = buf.end_turn().expect("should produce a turn");
495        assert_eq!(turn.tool_calls.len(), 2);
496        assert_eq!(turn.tool_calls[0].name, "get_weather");
497        assert_eq!(turn.tool_calls[1].name, "get_flights");
498    }
499
500    #[test]
501    fn tool_calls_appear_in_format_window() {
502        let mut buf = TranscriptBuffer::new();
503        buf.push_input("What's the weather?");
504        buf.push_tool_call(
505            "get_weather".to_string(),
506            &serde_json::json!({"city": "London"}),
507            &serde_json::json!({"temp": 22}),
508        );
509        buf.push_output("It's 22 degrees in London.");
510        buf.end_turn();
511
512        let formatted = buf.format_window(1);
513        assert!(formatted.contains("User: What's the weather?"));
514        assert!(formatted.contains("[Tool: get_weather("));
515        assert!(formatted.contains("London"));
516        assert!(formatted.contains("\u{2192}"));
517        assert!(formatted.contains("22"));
518        assert!(formatted.contains("Assistant: It's 22 degrees in London."));
519    }
520
521    #[test]
522    fn tool_call_only_turn_creates_turn() {
523        let mut buf = TranscriptBuffer::new();
524        // A turn with only a tool call and no user/model text
525        buf.push_tool_call(
526            "ping".to_string(),
527            &serde_json::json!({}),
528            &serde_json::json!({"pong": true}),
529        );
530        assert!(buf.has_pending());
531        let turn = buf
532            .end_turn()
533            .expect("tool-call-only turn should be created");
534        assert_eq!(turn.tool_calls.len(), 1);
535        assert_eq!(turn.user, "");
536        assert_eq!(turn.model, "");
537    }
538
539    #[test]
540    fn snapshot_window_creates_window() {
541        let mut buf = TranscriptBuffer::new();
542        buf.push_input("Hello");
543        buf.push_output("Hi there!");
544        buf.end_turn();
545        buf.push_input("How are you?");
546        buf.push_output("I'm good!");
547        buf.end_turn();
548
549        let window = buf.snapshot_window(5);
550        assert_eq!(window.len(), 2);
551        assert_eq!(window.last_user(), Some("How are you?"));
552        assert_eq!(window.last_model(), Some("I'm good!"));
553        assert!(!window.is_empty());
554    }
555
556    #[test]
557    fn transcript_window_formatted() {
558        let mut buf = TranscriptBuffer::new();
559        buf.push_input("What's the weather?");
560        buf.push_output("It's sunny.");
561        buf.end_turn();
562
563        let window = buf.snapshot_window(1);
564        let formatted = window.formatted();
565        assert!(formatted.contains("User: What's the weather?"));
566        assert!(formatted.contains("Assistant: It's sunny."));
567    }
568
569    #[test]
570    fn transcript_window_empty() {
571        let mut buf = TranscriptBuffer::new();
572        let window = buf.snapshot_window(5);
573        assert!(window.is_empty());
574        assert_eq!(window.len(), 0);
575        assert_eq!(window.last_user(), None);
576        assert_eq!(window.last_model(), None);
577    }
578
579    #[test]
580    fn ring_cap_evicts_oldest() {
581        let mut buf = TranscriptBuffer::with_capacity(3);
582        for i in 0..5 {
583            buf.push_input(&format!("user-{i}"));
584            buf.push_output(&format!("model-{i}"));
585            buf.end_turn();
586        }
587        // Only last 3 retained
588        assert_eq!(buf.retained_count(), 3);
589        assert_eq!(buf.turn_count(), 5);
590        let turns = buf.all_turns();
591        assert_eq!(turns[0].turn_number, 2);
592        assert_eq!(turns[1].turn_number, 3);
593        assert_eq!(turns[2].turn_number, 4);
594    }
595
596    #[test]
597    fn ring_cap_window_within_retained() {
598        let mut buf = TranscriptBuffer::with_capacity(4);
599        for i in 0..10 {
600            buf.push_input(&format!("u{i}"));
601            buf.end_turn();
602        }
603        let w = buf.window(2);
604        assert_eq!(w.len(), 2);
605        assert_eq!(w[0].turn_number, 8);
606        assert_eq!(w[1].turn_number, 9);
607    }
608
609    #[test]
610    fn default_capacity_is_50() {
611        let buf = TranscriptBuffer::new();
612        assert_eq!(buf.max_turns, DEFAULT_MAX_TURNS);
613    }
614
615    #[test]
616    fn tool_calls_reset_after_end_turn() {
617        let mut buf = TranscriptBuffer::new();
618        buf.push_input("turn 1");
619        buf.push_tool_call(
620            "tool_a".to_string(),
621            &serde_json::json!({"x": 1}),
622            &serde_json::json!({"y": 2}),
623        );
624        buf.end_turn();
625
626        buf.push_input("turn 2");
627        buf.push_output("no tools this time");
628        let turn2 = buf.end_turn().expect("should produce turn 2");
629        assert!(turn2.tool_calls.is_empty());
630
631        // Verify turn 1 still has its tool call
632        assert_eq!(buf.all_turns()[0].tool_calls.len(), 1);
633    }
634}