Skip to main content

opencode_voice/app/
recording.rs

1//! Recording session management — async input event handlers for the recording state machine.
2//!
3//! These functions are called from the main event loop in [`super`] when
4//! keyboard or hotkey input events arrive.  They implement the full recording
5//! pipeline: cpal capture → Whisper transcription → OpenCode injection.
6//!
7//! # Push-to-talk flow
8//!
9//! 1. [`handle_push_to_talk_start`] — opens a [`CpalRecorder`], starts the
10//!    audio stream, spawns an energy-forwarding task, transitions to
11//!    [`RecordingState::Recording`].
12//! 2. [`handle_push_to_talk_stop`] — stops the recorder, checks minimum
13//!    duration, writes a [`TempWav`], transcribes via Whisper, injects into
14//!    OpenCode, transitions back to Idle (or ApprovalPending).
15//!
16use crate::audio::capture::CpalRecorder;
17use crate::audio::wav::TempWav;
18use crate::state::{AppEvent, RecordingState};
19
20use super::VoiceApp;
21
22/// A `Send`-able wrapper around a raw pointer to [`WhisperEngine`].
23///
24/// # Safety
25///
26/// The caller must guarantee that:
27/// 1. The pointed-to `WhisperEngine` outlives all tasks that hold this wrapper.
28/// 2. The engine is never mutated while tasks are running.
29/// 3. No two tasks call `transcribe` concurrently on the same engine
30///    (whisper-rs is not thread-safe for concurrent inference).
31///
32/// In practice the engine is owned by `VoiceApp` which lives for the entire
33/// duration of the program, and we only ever run one transcription at a time.
34struct SendWhisperPtr(*const crate::transcribe::engine::WhisperEngine);
35
36// SAFETY: see doc comment above.
37unsafe impl Send for SendWhisperPtr {}
38
39impl SendWhisperPtr {
40    /// Returns a shared reference to the pointed-to engine.
41    ///
42    /// # Safety
43    ///
44    /// The caller must ensure the pointer is valid and the engine is not
45    /// concurrently mutated.
46    unsafe fn as_ref(&self) -> &crate::transcribe::engine::WhisperEngine {
47        &*self.0
48    }
49}
50
51// Minimum recording duration in seconds.  Recordings shorter than this are
52// silently discarded (e.g. accidental key taps).
53const MIN_RECORDING_SECS: f64 = 0.5;
54
55// Minimum number of i16 samples required to attempt transcription.
56// At 16 kHz mono: 0.5 s × 16 000 = 8 000 samples.
57const MIN_SAMPLES: usize = 8_000;
58
59// ─── Public entry points ────────────────────────────────────────────────────
60
61/// Handles a toggle event in standard (non-push-to-talk) mode.
62///
63/// Starts recording from [`RecordingState::Idle`] or
64/// [`RecordingState::ApprovalPending`].  Stops recording from
65/// [`RecordingState::Recording`].  Other states are ignored.
66pub(crate) async fn handle_toggle(app: &mut VoiceApp) {
67    match app.state {
68        RecordingState::Idle | RecordingState::ApprovalPending => {
69            app.state = RecordingState::Recording;
70            app.current_level = None;
71            app.render_display();
72        }
73        RecordingState::Recording => {
74            app.state = RecordingState::Transcribing;
75            app.current_level = None;
76            app.render_display();
77        }
78        _ => {
79            // Ignore toggle in other states.
80        }
81    }
82}
83
84
85/// Starts push-to-talk recording (key pressed down).
86///
87/// Opens a [`CpalRecorder`] for the configured audio device, starts the
88/// stream, spawns a task that forwards RMS energy values as
89/// [`AppEvent::AudioChunk`] events, and transitions to
90/// [`RecordingState::Recording`].
91///
92/// Recording is allowed from both [`RecordingState::Idle`] and
93/// [`RecordingState::ApprovalPending`].  In the latter case the user may
94/// be speaking to answer a pending approval, or to inject a new prompt —
95/// the transcription pipeline handles both.
96///
97/// If the recorder cannot be opened (e.g. no microphone), the error is
98/// reported via [`VoiceApp::handle_error`] and the state remains unchanged.
99pub(crate) async fn handle_push_to_talk_start(app: &mut VoiceApp) {
100    if app.state != RecordingState::Idle && app.state != RecordingState::ApprovalPending {
101        return;
102    }
103
104    let device = app.audio_config.device.as_deref();
105
106    // Create and start the recorder.
107    let mut recorder = match CpalRecorder::new(device) {
108        Ok(r) => r,
109        Err(e) => {
110            app.handle_error(&format!("Failed to open audio device: {}", e));
111            return;
112        }
113    };
114
115    let energy_rx = match recorder.start() {
116        Ok(rx) => rx,
117        Err(e) => {
118            app.handle_error(&format!("Failed to start recording: {}", e));
119            return;
120        }
121    };
122
123    // Spawn a task that reads RMS energy from the recorder and forwards it to
124    // the event loop as AudioChunk events so the level meter stays live.
125    let event_tx = app.event_tx.clone();
126    let mut energy_rx = energy_rx;
127    tokio::spawn(async move {
128        while let Some(rms_energy) = energy_rx.recv().await {
129            if event_tx
130                .send(AppEvent::AudioChunk { rms_energy })
131                .is_err()
132            {
133                break; // Event loop has shut down.
134            }
135        }
136    });
137
138    // Store the recorder so handle_push_to_talk_stop can retrieve it.
139    app.recorder = Some(recorder);
140
141    app.state = RecordingState::Recording;
142    app.current_level = None;
143    app.render_display();
144}
145
146/// Stops push-to-talk recording (key released).
147///
148/// Retrieves the active [`CpalRecorder`], stops it, checks the minimum
149/// recording duration, writes a [`TempWav`], transcribes via Whisper, and
150/// injects the result into OpenCode.  Transitions back to Idle (or
151/// [`RecordingState::ApprovalPending`] if there are pending approvals).
152///
153/// Short recordings (< 0.5 s) are silently discarded.
154pub(crate) async fn handle_push_to_talk_stop(app: &mut VoiceApp) {
155    if app.state != RecordingState::Recording {
156        return;
157    }
158
159    // Take the recorder out of the app struct.
160    let mut recorder = match app.recorder.take() {
161        Some(r) => r,
162        None => {
163            // No recorder — just return to idle.
164            return_to_idle_or_approval(app);
165            return;
166        }
167    };
168
169    // Check duration before stopping (stop() clears the start_time).
170    let duration = recorder.duration();
171
172    // Stop the stream and collect samples.
173    let samples = match recorder.stop() {
174        Ok(s) => s,
175        Err(e) => {
176            app.handle_error(&format!("Failed to stop recording: {}", e));
177            return;
178        }
179    };
180
181    // Discard very short recordings.
182    if duration < MIN_RECORDING_SECS || samples.len() < MIN_SAMPLES {
183        app.display.log(&format!(
184            "[voice] Recording too short ({:.2}s, {} samples) — discarded.",
185            duration,
186            samples.len()
187        ));
188        return_to_idle_or_approval(app);
189        return;
190    }
191
192    // Transition to Transcribing while we process.
193    app.state = RecordingState::Transcribing;
194    app.current_level = None;
195    app.render_display();
196
197    // Write samples to a temporary WAV file.
198    // TempWav is RAII: if we return early (error path) before calling
199    // into_path(), the file is automatically deleted on drop.
200    let wav = TempWav::new();
201    if let Err(e) = wav.write(&samples, &app.audio_config) {
202        app.handle_error(&format!("Failed to write WAV file: {}", e));
203        return;
204    }
205
206    // Consume TempWav without deleting the file; we own cleanup from here.
207    let wav_path = wav.into_path();
208
209    // Run Whisper transcription on a blocking thread (CPU-bound).
210    let transcript = match &app.whisper {
211        None => {
212            // No model loaded — clean up and return.
213            let _ = std::fs::remove_file(&wav_path);
214            app.handle_error("Whisper model not loaded. Run 'opencode-voice setup'.");
215            return;
216        }
217        Some(_) => {
218            // Clone the path for the blocking closure.
219            let path_for_task = wav_path.clone();
220
221            // SAFETY: We need to move the WhisperEngine reference into the
222            // blocking task.  We use a raw pointer wrapped in SendWhisperPtr
223            // to work around the borrow checker — this is safe because:
224            //   1. We await the task before returning, so the engine outlives it.
225            //   2. The task does not outlive this function frame.
226            //   3. WhisperEngine is not mutated.
227            let engine_ptr = SendWhisperPtr(
228                app.whisper.as_ref().unwrap() as *const crate::transcribe::engine::WhisperEngine,
229            );
230
231            let result = tokio::task::spawn_blocking(move || {
232                // SAFETY: see SendWhisperPtr safety doc.
233                let engine = unsafe { engine_ptr.as_ref() };
234                engine.transcribe(&path_for_task)
235            })
236            .await;
237
238            // Clean up the WAV file regardless of transcription outcome.
239            let _ = std::fs::remove_file(&wav_path);
240
241            match result {
242                Ok(Ok(r)) => r,
243                Ok(Err(e)) => {
244                    app.handle_error(&format!("Transcription failed: {}", e));
245                    return;
246                }
247                Err(e) => {
248                    app.handle_error(&format!("Transcription task panicked: {}", e));
249                    return;
250                }
251            }
252        }
253    };
254
255    let text = transcript.text.trim().to_string();
256
257    if text.is_empty() {
258        // Nothing transcribed — return to idle without injecting.
259        return_to_idle_or_approval(app);
260        return;
261    }
262
263    // Store the transcript for the idle display.
264    app.last_transcript = Some(text.clone());
265
266    // Check if there is a pending approval that this text might answer.
267    if app.approval_queue.has_pending() {
268        let handled = try_handle_approval(app, &text).await;
269        if handled {
270            return_to_idle_or_approval(app);
271            return;
272        }
273    }
274
275    // Inject the transcribed text into OpenCode.
276    inject_text(app, &text).await;
277}
278
279// ─── Helpers ────────────────────────────────────────────────────────────────
280
281/// Injects `text` into OpenCode and transitions to Injecting → Idle.
282///
283/// Calls `bridge.append_prompt` and, if `auto_submit` is enabled,
284/// `bridge.submit_prompt`.  On error, calls `handle_error`.
285async fn inject_text(app: &mut VoiceApp, text: &str) {
286    app.state = RecordingState::Injecting;
287    app.render_display();
288
289    if let Err(e) = app.bridge.append_prompt(text, None, None).await {
290        app.handle_error(&format!("Failed to inject text: {}", e));
291        return;
292    }
293
294    if app.config.auto_submit {
295        if let Err(e) = app.bridge.submit_prompt().await {
296            app.handle_error(&format!("Failed to submit prompt: {}", e));
297            return;
298        }
299    }
300
301    return_to_idle_or_approval(app);
302}
303
304/// Transitions to [`RecordingState::ApprovalPending`] if there are pending
305/// approvals, otherwise to [`RecordingState::Idle`].  Updates the display.
306pub(crate) fn return_to_idle_or_approval(app: &mut VoiceApp) {
307    if app.approval_queue.has_pending() {
308        app.state = RecordingState::ApprovalPending;
309    } else {
310        app.state = RecordingState::Idle;
311    }
312    app.current_level = None;
313    app.render_display();
314}
315
316/// Attempts to handle `text` as a voice reply to the front-most pending approval.
317///
318/// # Behaviour
319///
320/// 1. Peeks the approval queue.  If empty, returns `false` immediately so the
321///    caller can fall through to normal prompt injection.
322/// 2. **Permission** — calls [`match_permission_command`].  On a match, sends
323///    the reply via [`OpenCodeBridge::reply_permission`], removes the item from
324///    the queue, calls [`refresh_approval_display`], and returns `true`.
325///    On [`MatchResult::NoMatch`] returns `false`.
326/// 3. **Question** — calls [`match_question_answer`].
327///    * [`MatchResult::QuestionAnswer`] → [`OpenCodeBridge::reply_question`] →
328///      remove → refresh → `true`.
329///    * [`MatchResult::QuestionReject`] → [`OpenCodeBridge::reject_question`] →
330///      remove → refresh → `true`.
331///    * [`MatchResult::NoMatch`] → `false`.
332///
333/// Bridge call failures are reported via [`VoiceApp::handle_error`] (non-fatal)
334/// and the function still returns `true` so the text is not re-injected as a
335/// normal prompt.
336pub(crate) async fn try_handle_approval(app: &mut VoiceApp, text: &str) -> bool {
337    use crate::approval::matcher::{match_permission_command, match_question_answer, MatchResult};
338    use crate::approval::types::PendingApproval;
339
340    // Peek at the front of the queue.  Clone what we need so we can release
341    // the borrow on `app` before making async bridge calls.
342    let pending = match app.approval_queue.peek() {
343        Some(p) => p.clone(),
344        None => return false,
345    };
346
347    match &pending {
348        PendingApproval::Permission(_req) => {
349            let result = match_permission_command(text);
350            match result {
351                MatchResult::PermissionReply { reply, message } => {
352                    let id = pending.id().to_string();
353                    let msg_ref = message.as_deref();
354                    if let Err(e) = app.bridge.reply_permission(&id, reply, msg_ref).await {
355                        app.handle_error(&format!("Failed to reply to permission: {}", e));
356                    }
357                    app.approval_queue.remove(&id);
358                    super::approval::refresh_approval_display(app);
359                    true
360                }
361                MatchResult::NoMatch => false,
362                // match_permission_command never returns QuestionAnswer / QuestionReject,
363                // but the compiler requires exhaustive matching.
364                _ => false,
365            }
366        }
367
368        PendingApproval::Question(req) => {
369            // Clone the request so we can pass it to the matcher without
370            // holding a borrow on `app`.
371            let req_clone = req.clone();
372            let result = match_question_answer(text, &req_clone);
373            match result {
374                MatchResult::QuestionAnswer { answers } => {
375                    let id = pending.id().to_string();
376                    if let Err(e) = app.bridge.reply_question(&id, answers).await {
377                        app.handle_error(&format!("Failed to reply to question: {}", e));
378                    }
379                    app.approval_queue.remove(&id);
380                    super::approval::refresh_approval_display(app);
381                    true
382                }
383                MatchResult::QuestionReject => {
384                    let id = pending.id().to_string();
385                    if let Err(e) = app.bridge.reject_question(&id).await {
386                        app.handle_error(&format!("Failed to reject question: {}", e));
387                    }
388                    app.approval_queue.remove(&id);
389                    super::approval::refresh_approval_display(app);
390                    true
391                }
392                MatchResult::NoMatch => false,
393                // match_question_answer never returns PermissionReply.
394                _ => false,
395            }
396        }
397    }
398}
399
400// ─── Tests ───────────────────────────────────────────────────────────────────
401
402#[cfg(test)]
403mod tests {
404    use super::*;
405    use crate::app::VoiceApp;
406    use crate::config::{AppConfig, ModelSize};
407    use std::path::PathBuf;
408
409    fn test_config() -> AppConfig {
410        AppConfig {
411            whisper_model_path: PathBuf::from("/nonexistent/model.bin"),
412            opencode_port: 4096,
413            toggle_key: ' ',
414            model_size: ModelSize::TinyEn,
415            auto_submit: true,
416            server_password: None,
417            data_dir: PathBuf::from("/nonexistent/data"),
418            audio_device: None,
419            use_global_hotkey: false,
420            global_hotkey: "right_option".to_string(),
421            push_to_talk: false,
422            approval_mode: false,
423        }
424    }
425
426    // ── handle_toggle ────────────────────────────────────────────────────────
427
428    #[tokio::test]
429    async fn test_handle_toggle_idle_to_recording() {
430        let mut app = VoiceApp::new(test_config()).unwrap();
431        assert_eq!(app.state, RecordingState::Idle);
432        handle_toggle(&mut app).await;
433        assert_eq!(app.state, RecordingState::Recording);
434    }
435
436    #[tokio::test]
437    async fn test_handle_toggle_recording_to_transcribing() {
438        let mut app = VoiceApp::new(test_config()).unwrap();
439        app.state = RecordingState::Recording;
440        handle_toggle(&mut app).await;
441        assert_eq!(app.state, RecordingState::Transcribing);
442    }
443
444    #[tokio::test]
445    async fn test_handle_toggle_approval_pending_to_recording() {
446        let mut app = VoiceApp::new(test_config()).unwrap();
447        app.state = RecordingState::ApprovalPending;
448        handle_toggle(&mut app).await;
449        assert_eq!(app.state, RecordingState::Recording);
450    }
451
452    #[tokio::test]
453    async fn test_handle_toggle_ignores_transcribing_state() {
454        let mut app = VoiceApp::new(test_config()).unwrap();
455        app.state = RecordingState::Transcribing;
456        handle_toggle(&mut app).await;
457        assert_eq!(app.state, RecordingState::Transcribing);
458    }
459
460    // ── handle_push_to_talk_start / stop ─────────────────────────────────────
461
462    #[tokio::test]
463    async fn test_handle_push_to_talk_start_ignores_transcribing() {
464        let mut app = VoiceApp::new(test_config()).unwrap();
465        app.state = RecordingState::Transcribing;
466        handle_push_to_talk_start(&mut app).await;
467        // Should remain Transcribing — PTT start is only allowed from Idle or ApprovalPending.
468        assert_eq!(app.state, RecordingState::Transcribing);
469    }
470
471    #[tokio::test]
472    async fn test_handle_push_to_talk_start_ignores_recording() {
473        let mut app = VoiceApp::new(test_config()).unwrap();
474        app.state = RecordingState::Recording;
475        handle_push_to_talk_start(&mut app).await;
476        // Should remain Recording — PTT start is only allowed from Idle or ApprovalPending.
477        assert_eq!(app.state, RecordingState::Recording);
478    }
479
480    #[tokio::test]
481    async fn test_handle_push_to_talk_stop_ignores_idle() {
482        let mut app = VoiceApp::new(test_config()).unwrap();
483        // Calling stop when not recording should be a no-op.
484        handle_push_to_talk_stop(&mut app).await;
485        assert_eq!(app.state, RecordingState::Idle);
486    }
487
488    #[tokio::test]
489    async fn test_handle_push_to_talk_stop_no_recorder_returns_to_idle() {
490        let mut app = VoiceApp::new(test_config()).unwrap();
491        // Manually set Recording state without a recorder.
492        app.state = RecordingState::Recording;
493        handle_push_to_talk_stop(&mut app).await;
494        // Should return to Idle (no recorder → return_to_idle_or_approval).
495        assert_eq!(app.state, RecordingState::Idle);
496    }
497
498    // ── return_to_idle_or_approval ───────────────────────────────────────────
499
500    #[test]
501    fn test_return_to_idle_when_no_pending() {
502        let mut app = VoiceApp::new(test_config()).unwrap();
503        app.state = RecordingState::Injecting;
504        return_to_idle_or_approval(&mut app);
505        assert_eq!(app.state, RecordingState::Idle);
506    }
507
508    #[test]
509    fn test_return_to_approval_pending_when_queue_has_items() {
510        use crate::approval::types::PermissionRequest;
511
512        let mut app = VoiceApp::new(test_config()).unwrap();
513        app.state = RecordingState::Injecting;
514
515        // Add a pending approval.
516        app.approval_queue.add_permission(PermissionRequest {
517            id: "p1".to_string(),
518            permission: "bash".to_string(),
519            metadata: serde_json::Value::Null,
520        });
521
522        return_to_idle_or_approval(&mut app);
523        assert_eq!(app.state, RecordingState::ApprovalPending);
524    }
525
526    // ── try_handle_approval ──────────────────────────────────────────────────
527
528    /// Returns false when the approval queue is empty (nothing to handle).
529    #[tokio::test]
530    async fn test_try_handle_approval_empty_queue_returns_false() {
531        let mut app = VoiceApp::new(test_config()).unwrap();
532        // Queue is empty — any text should return false.
533        let result = try_handle_approval(&mut app, "yes").await;
534        assert!(!result, "empty queue should return false");
535    }
536
537    /// Returns false when the text does not match any permission pattern.
538    #[tokio::test]
539    async fn test_try_handle_approval_permission_no_match_returns_false() {
540        use crate::approval::types::PermissionRequest;
541
542        let mut app = VoiceApp::new(test_config()).unwrap();
543        app.approval_queue.add_permission(PermissionRequest {
544            id: "p1".to_string(),
545            permission: "bash".to_string(),
546            metadata: serde_json::Value::Null,
547        });
548
549        // "hello world" does not match any permission command.
550        let result = try_handle_approval(&mut app, "hello world").await;
551        assert!(!result, "unrecognised text should return false");
552        // Item must still be in the queue.
553        assert!(app.approval_queue.has_pending());
554    }
555
556    /// Returns false when the text does not match any question option.
557    #[tokio::test]
558    async fn test_try_handle_approval_question_no_match_returns_false() {
559        use crate::approval::types::{QuestionInfo, QuestionOption, QuestionRequest};
560
561        let mut app = VoiceApp::new(test_config()).unwrap();
562        app.approval_queue.add_question(QuestionRequest {
563            id: "q1".to_string(),
564            questions: vec![QuestionInfo {
565                question: "Pick one".to_string(),
566                options: vec![
567                    QuestionOption {
568                        label: "Alpha".to_string(),
569                    },
570                    QuestionOption {
571                        label: "Beta".to_string(),
572                    },
573                ],
574                custom: false, // no custom answers allowed
575            }],
576        });
577
578        // "gamma" is not an option and custom is disabled.
579        let result = try_handle_approval(&mut app, "gamma").await;
580        assert!(!result, "unrecognised question answer should return false");
581        assert!(app.approval_queue.has_pending());
582    }
583
584    /// A matching permission command removes the item from the queue and
585    /// returns true.  The bridge call will fail (no server running) but the
586    /// function should still return true and remove the item.
587    #[tokio::test]
588    async fn test_try_handle_approval_permission_match_removes_item_and_returns_true() {
589        use crate::approval::types::PermissionRequest;
590
591        let mut app = VoiceApp::new(test_config()).unwrap();
592        app.approval_queue.add_permission(PermissionRequest {
593            id: "p1".to_string(),
594            permission: "bash".to_string(),
595            metadata: serde_json::Value::Null,
596        });
597        app.state = RecordingState::ApprovalPending;
598
599        // "yes" matches the Once permission pattern.
600        // The bridge call will fail (no server), but the item is still removed.
601        let result = try_handle_approval(&mut app, "yes").await;
602        assert!(result, "matched permission should return true");
603        assert!(
604            !app.approval_queue.has_pending(),
605            "item should be removed from queue after match"
606        );
607    }
608
609    /// A matching question answer removes the item from the queue and returns
610    /// true.
611    #[tokio::test]
612    async fn test_try_handle_approval_question_match_removes_item_and_returns_true() {
613        use crate::approval::types::{QuestionInfo, QuestionOption, QuestionRequest};
614
615        let mut app = VoiceApp::new(test_config()).unwrap();
616        app.approval_queue.add_question(QuestionRequest {
617            id: "q1".to_string(),
618            questions: vec![QuestionInfo {
619                question: "Pick one".to_string(),
620                options: vec![
621                    QuestionOption {
622                        label: "Alpha".to_string(),
623                    },
624                    QuestionOption {
625                        label: "Beta".to_string(),
626                    },
627                ],
628                custom: false,
629            }],
630        });
631        app.state = RecordingState::ApprovalPending;
632
633        // "alpha" matches the first option exactly.
634        let result = try_handle_approval(&mut app, "alpha").await;
635        assert!(result, "matched question answer should return true");
636        assert!(
637            !app.approval_queue.has_pending(),
638            "item should be removed from queue after match"
639        );
640    }
641
642    /// A question rejection phrase removes the item from the queue and returns
643    /// true.
644    #[tokio::test]
645    async fn test_try_handle_approval_question_reject_removes_item_and_returns_true() {
646        use crate::approval::types::{QuestionInfo, QuestionOption, QuestionRequest};
647
648        let mut app = VoiceApp::new(test_config()).unwrap();
649        app.approval_queue.add_question(QuestionRequest {
650            id: "q2".to_string(),
651            questions: vec![QuestionInfo {
652                question: "Pick one".to_string(),
653                options: vec![QuestionOption {
654                    label: "Yes".to_string(),
655                }],
656                custom: false,
657            }],
658        });
659        app.state = RecordingState::ApprovalPending;
660
661        // "skip" is a question rejection phrase.
662        let result = try_handle_approval(&mut app, "skip").await;
663        assert!(result, "question rejection should return true");
664        assert!(
665            !app.approval_queue.has_pending(),
666            "item should be removed from queue after rejection"
667        );
668    }
669
670}