Skip to main content

opencode_voice/app/
recording.rs

1//! Recording session management — async input event handlers for the recording state machine.
2//!
3//! These functions are called from the main event loop in [`super`] when
4//! keyboard or hotkey input events arrive.  They implement the full recording
5//! pipeline: cpal capture → Whisper transcription → OpenCode injection.
6//!
7//! # Push-to-talk flow
8//!
9//! 1. [`handle_push_to_talk_start`] — opens a [`CpalRecorder`], starts the
10//!    audio stream, spawns an energy-forwarding task, transitions to
11//!    [`RecordingState::Recording`].
12//! 2. [`handle_push_to_talk_stop`] — stops the recorder, checks minimum
13//!    duration, writes a [`TempWav`], transcribes via Whisper, injects into
14//!    OpenCode, transitions back to Idle (or ApprovalPending).
15//!
16use crate::audio::capture::CpalRecorder;
17use crate::audio::wav::TempWav;
18use crate::state::{AppEvent, RecordingState};
19
20use super::VoiceApp;
21
22/// A `Send`-able wrapper around a raw pointer to [`WhisperEngine`].
23///
24/// # Safety
25///
26/// The caller must guarantee that:
27/// 1. The pointed-to `WhisperEngine` outlives all tasks that hold this wrapper.
28/// 2. The engine is never mutated while tasks are running.
29/// 3. No two tasks call `transcribe` concurrently on the same engine
30///    (whisper-rs is not thread-safe for concurrent inference).
31///
32/// In practice the engine is owned by `VoiceApp` which lives for the entire
33/// duration of the program, and we only ever run one transcription at a time.
34struct SendWhisperPtr(*const crate::transcribe::engine::WhisperEngine);
35
36// SAFETY: see doc comment above.
37unsafe impl Send for SendWhisperPtr {}
38
39impl SendWhisperPtr {
40    /// Returns a shared reference to the pointed-to engine.
41    ///
42    /// # Safety
43    ///
44    /// The caller must ensure the pointer is valid and the engine is not
45    /// concurrently mutated.
46    unsafe fn as_ref(&self) -> &crate::transcribe::engine::WhisperEngine {
47        &*self.0
48    }
49}
50
51// Minimum recording duration in seconds.  Recordings shorter than this are
52// silently discarded (e.g. accidental key taps).
53const MIN_RECORDING_SECS: f64 = 0.5;
54
55// Minimum number of i16 samples required to attempt transcription.
56// At 16 kHz mono: 0.5 s × 16 000 = 8 000 samples.
57const MIN_SAMPLES: usize = 8_000;
58
59// ─── Public entry points ────────────────────────────────────────────────────
60
61/// Handles a toggle event in standard (non-push-to-talk) mode.
62///
63/// If currently Idle, starts recording.  If currently Recording, stops and
64/// transcribes.  Other states are ignored.
65pub(crate) async fn handle_toggle(app: &mut VoiceApp) {
66    match app.state {
67        RecordingState::Idle => {
68            app.state = RecordingState::Recording;
69            app.current_level = None;
70            app.render_display();
71        }
72        RecordingState::Recording => {
73            app.state = RecordingState::Transcribing;
74            app.current_level = None;
75            app.render_display();
76        }
77        _ => {
78            // Ignore toggle in other states.
79        }
80    }
81}
82
83
84/// Starts push-to-talk recording (key pressed down).
85///
86/// Opens a [`CpalRecorder`] for the configured audio device, starts the
87/// stream, spawns a task that forwards RMS energy values as
88/// [`AppEvent::AudioChunk`] events, and transitions to
89/// [`RecordingState::Recording`].
90///
91/// If the recorder cannot be opened (e.g. no microphone), the error is
92/// reported via [`VoiceApp::handle_error`] and the state remains unchanged.
93pub(crate) async fn handle_push_to_talk_start(app: &mut VoiceApp) {
94    if app.state != RecordingState::Idle {
95        return;
96    }
97
98    let device = app.audio_config.device.as_deref();
99
100    // Create and start the recorder.
101    let mut recorder = match CpalRecorder::new(device) {
102        Ok(r) => r,
103        Err(e) => {
104            app.handle_error(&format!("Failed to open audio device: {}", e));
105            return;
106        }
107    };
108
109    let energy_rx = match recorder.start() {
110        Ok(rx) => rx,
111        Err(e) => {
112            app.handle_error(&format!("Failed to start recording: {}", e));
113            return;
114        }
115    };
116
117    // Spawn a task that reads RMS energy from the recorder and forwards it to
118    // the event loop as AudioChunk events so the level meter stays live.
119    let event_tx = app.event_tx.clone();
120    let mut energy_rx = energy_rx;
121    tokio::spawn(async move {
122        while let Some(rms_energy) = energy_rx.recv().await {
123            if event_tx
124                .send(AppEvent::AudioChunk { rms_energy })
125                .is_err()
126            {
127                break; // Event loop has shut down.
128            }
129        }
130    });
131
132    // Store the recorder so handle_push_to_talk_stop can retrieve it.
133    app.recorder = Some(recorder);
134
135    app.state = RecordingState::Recording;
136    app.current_level = None;
137    app.render_display();
138}
139
140/// Stops push-to-talk recording (key released).
141///
142/// Retrieves the active [`CpalRecorder`], stops it, checks the minimum
143/// recording duration, writes a [`TempWav`], transcribes via Whisper, and
144/// injects the result into OpenCode.  Transitions back to Idle (or
145/// [`RecordingState::ApprovalPending`] if there are pending approvals).
146///
147/// Short recordings (< 0.5 s) are silently discarded.
148pub(crate) async fn handle_push_to_talk_stop(app: &mut VoiceApp) {
149    if app.state != RecordingState::Recording {
150        return;
151    }
152
153    // Take the recorder out of the app struct.
154    let mut recorder = match app.recorder.take() {
155        Some(r) => r,
156        None => {
157            // No recorder — just return to idle.
158            return_to_idle_or_approval(app);
159            return;
160        }
161    };
162
163    // Check duration before stopping (stop() clears the start_time).
164    let duration = recorder.duration();
165
166    // Stop the stream and collect samples.
167    let samples = match recorder.stop() {
168        Ok(s) => s,
169        Err(e) => {
170            app.handle_error(&format!("Failed to stop recording: {}", e));
171            return;
172        }
173    };
174
175    // Discard very short recordings.
176    if duration < MIN_RECORDING_SECS || samples.len() < MIN_SAMPLES {
177        app.display.log(&format!(
178            "[voice] Recording too short ({:.2}s, {} samples) — discarded.",
179            duration,
180            samples.len()
181        ));
182        return_to_idle_or_approval(app);
183        return;
184    }
185
186    // Transition to Transcribing while we process.
187    app.state = RecordingState::Transcribing;
188    app.current_level = None;
189    app.render_display();
190
191    // Write samples to a temporary WAV file.
192    // TempWav is RAII: if we return early (error path) before calling
193    // into_path(), the file is automatically deleted on drop.
194    let wav = TempWav::new();
195    if let Err(e) = wav.write(&samples, &app.audio_config) {
196        app.handle_error(&format!("Failed to write WAV file: {}", e));
197        return;
198    }
199
200    // Consume TempWav without deleting the file; we own cleanup from here.
201    let wav_path = wav.into_path();
202
203    // Run Whisper transcription on a blocking thread (CPU-bound).
204    let transcript = match &app.whisper {
205        None => {
206            // No model loaded — clean up and return.
207            let _ = std::fs::remove_file(&wav_path);
208            app.handle_error("Whisper model not loaded. Run 'opencode-voice setup'.");
209            return;
210        }
211        Some(_) => {
212            // Clone the path for the blocking closure.
213            let path_for_task = wav_path.clone();
214
215            // SAFETY: We need to move the WhisperEngine reference into the
216            // blocking task.  We use a raw pointer wrapped in SendWhisperPtr
217            // to work around the borrow checker — this is safe because:
218            //   1. We await the task before returning, so the engine outlives it.
219            //   2. The task does not outlive this function frame.
220            //   3. WhisperEngine is not mutated.
221            let engine_ptr = SendWhisperPtr(
222                app.whisper.as_ref().unwrap() as *const crate::transcribe::engine::WhisperEngine,
223            );
224
225            let result = tokio::task::spawn_blocking(move || {
226                // SAFETY: see SendWhisperPtr safety doc.
227                let engine = unsafe { engine_ptr.as_ref() };
228                engine.transcribe(&path_for_task)
229            })
230            .await;
231
232            // Clean up the WAV file regardless of transcription outcome.
233            let _ = std::fs::remove_file(&wav_path);
234
235            match result {
236                Ok(Ok(r)) => r,
237                Ok(Err(e)) => {
238                    app.handle_error(&format!("Transcription failed: {}", e));
239                    return;
240                }
241                Err(e) => {
242                    app.handle_error(&format!("Transcription task panicked: {}", e));
243                    return;
244                }
245            }
246        }
247    };
248
249    let text = transcript.text.trim().to_string();
250
251    if text.is_empty() {
252        // Nothing transcribed — return to idle without injecting.
253        return_to_idle_or_approval(app);
254        return;
255    }
256
257    // Store the transcript for the idle display.
258    app.last_transcript = Some(text.clone());
259
260    // Check if there is a pending approval that this text might answer.
261    if app.approval_queue.has_pending() {
262        let handled = try_handle_approval(app, &text).await;
263        if handled {
264            return_to_idle_or_approval(app);
265            return;
266        }
267    }
268
269    // Inject the transcribed text into OpenCode.
270    inject_text(app, &text).await;
271}
272
273// ─── Helpers ────────────────────────────────────────────────────────────────
274
275/// Injects `text` into OpenCode and transitions to Injecting → Idle.
276///
277/// Calls `bridge.append_prompt` and, if `auto_submit` is enabled,
278/// `bridge.submit_prompt`.  On error, calls `handle_error`.
279async fn inject_text(app: &mut VoiceApp, text: &str) {
280    app.state = RecordingState::Injecting;
281    app.render_display();
282
283    if let Err(e) = app.bridge.append_prompt(text, None, None).await {
284        app.handle_error(&format!("Failed to inject text: {}", e));
285        return;
286    }
287
288    if app.config.auto_submit {
289        if let Err(e) = app.bridge.submit_prompt().await {
290            app.handle_error(&format!("Failed to submit prompt: {}", e));
291            return;
292        }
293    }
294
295    return_to_idle_or_approval(app);
296}
297
298/// Transitions to [`RecordingState::ApprovalPending`] if there are pending
299/// approvals, otherwise to [`RecordingState::Idle`].  Updates the display.
300pub(crate) fn return_to_idle_or_approval(app: &mut VoiceApp) {
301    if app.approval_queue.has_pending() {
302        app.state = RecordingState::ApprovalPending;
303    } else {
304        app.state = RecordingState::Idle;
305    }
306    app.current_level = None;
307    app.render_display();
308}
309
310/// Attempts to handle `text` as a voice reply to the front-most pending approval.
311///
312/// # Behaviour
313///
314/// 1. Peeks the approval queue.  If empty, returns `false` immediately so the
315///    caller can fall through to normal prompt injection.
316/// 2. **Permission** — calls [`match_permission_command`].  On a match, sends
317///    the reply via [`OpenCodeBridge::reply_permission`], removes the item from
318///    the queue, calls [`refresh_approval_display`], and returns `true`.
319///    On [`MatchResult::NoMatch`] returns `false`.
320/// 3. **Question** — calls [`match_question_answer`].
321///    * [`MatchResult::QuestionAnswer`] → [`OpenCodeBridge::reply_question`] →
322///      remove → refresh → `true`.
323///    * [`MatchResult::QuestionReject`] → [`OpenCodeBridge::reject_question`] →
324///      remove → refresh → `true`.
325///    * [`MatchResult::NoMatch`] → `false`.
326///
327/// Bridge call failures are reported via [`VoiceApp::handle_error`] (non-fatal)
328/// and the function still returns `true` so the text is not re-injected as a
329/// normal prompt.
330pub(crate) async fn try_handle_approval(app: &mut VoiceApp, text: &str) -> bool {
331    use crate::approval::matcher::{match_permission_command, match_question_answer, MatchResult};
332    use crate::approval::types::PendingApproval;
333
334    // Peek at the front of the queue.  Clone what we need so we can release
335    // the borrow on `app` before making async bridge calls.
336    let pending = match app.approval_queue.peek() {
337        Some(p) => p.clone(),
338        None => return false,
339    };
340
341    match &pending {
342        PendingApproval::Permission(_req) => {
343            let result = match_permission_command(text);
344            match result {
345                MatchResult::PermissionReply { reply, message } => {
346                    let id = pending.id().to_string();
347                    let msg_ref = message.as_deref();
348                    if let Err(e) = app.bridge.reply_permission(&id, reply, msg_ref).await {
349                        app.handle_error(&format!("Failed to reply to permission: {}", e));
350                    }
351                    app.approval_queue.remove(&id);
352                    super::approval::refresh_approval_display(app);
353                    true
354                }
355                MatchResult::NoMatch => false,
356                // match_permission_command never returns QuestionAnswer / QuestionReject,
357                // but the compiler requires exhaustive matching.
358                _ => false,
359            }
360        }
361
362        PendingApproval::Question(req) => {
363            // Clone the request so we can pass it to the matcher without
364            // holding a borrow on `app`.
365            let req_clone = req.clone();
366            let result = match_question_answer(text, &req_clone);
367            match result {
368                MatchResult::QuestionAnswer { answers } => {
369                    let id = pending.id().to_string();
370                    if let Err(e) = app.bridge.reply_question(&id, answers).await {
371                        app.handle_error(&format!("Failed to reply to question: {}", e));
372                    }
373                    app.approval_queue.remove(&id);
374                    super::approval::refresh_approval_display(app);
375                    true
376                }
377                MatchResult::QuestionReject => {
378                    let id = pending.id().to_string();
379                    if let Err(e) = app.bridge.reject_question(&id).await {
380                        app.handle_error(&format!("Failed to reject question: {}", e));
381                    }
382                    app.approval_queue.remove(&id);
383                    super::approval::refresh_approval_display(app);
384                    true
385                }
386                MatchResult::NoMatch => false,
387                // match_question_answer never returns PermissionReply.
388                _ => false,
389            }
390        }
391    }
392}
393
394// ─── Tests ───────────────────────────────────────────────────────────────────
395
396#[cfg(test)]
397mod tests {
398    use super::*;
399    use crate::app::VoiceApp;
400    use crate::config::{AppConfig, ModelSize};
401    use std::path::PathBuf;
402
403    fn test_config() -> AppConfig {
404        AppConfig {
405            whisper_model_path: PathBuf::from("/nonexistent/model.bin"),
406            opencode_port: 4096,
407            toggle_key: ' ',
408            model_size: ModelSize::TinyEn,
409            auto_submit: true,
410            server_password: None,
411            data_dir: PathBuf::from("/nonexistent/data"),
412            audio_device: None,
413            use_global_hotkey: false,
414            global_hotkey: "right_option".to_string(),
415            push_to_talk: false,
416            approval_mode: false,
417        }
418    }
419
420    // ── handle_toggle ────────────────────────────────────────────────────────
421
422    #[tokio::test]
423    async fn test_handle_toggle_idle_to_recording() {
424        let mut app = VoiceApp::new(test_config()).unwrap();
425        assert_eq!(app.state, RecordingState::Idle);
426        handle_toggle(&mut app).await;
427        assert_eq!(app.state, RecordingState::Recording);
428    }
429
430    #[tokio::test]
431    async fn test_handle_toggle_recording_to_transcribing() {
432        let mut app = VoiceApp::new(test_config()).unwrap();
433        app.state = RecordingState::Recording;
434        handle_toggle(&mut app).await;
435        assert_eq!(app.state, RecordingState::Transcribing);
436    }
437
438    #[tokio::test]
439    async fn test_handle_toggle_ignores_transcribing_state() {
440        let mut app = VoiceApp::new(test_config()).unwrap();
441        app.state = RecordingState::Transcribing;
442        handle_toggle(&mut app).await;
443        assert_eq!(app.state, RecordingState::Transcribing);
444    }
445
446    // ── handle_push_to_talk_start / stop ─────────────────────────────────────
447
448    #[tokio::test]
449    async fn test_handle_push_to_talk_stop_ignores_idle() {
450        let mut app = VoiceApp::new(test_config()).unwrap();
451        // Calling stop when not recording should be a no-op.
452        handle_push_to_talk_stop(&mut app).await;
453        assert_eq!(app.state, RecordingState::Idle);
454    }
455
456    #[tokio::test]
457    async fn test_handle_push_to_talk_stop_no_recorder_returns_to_idle() {
458        let mut app = VoiceApp::new(test_config()).unwrap();
459        // Manually set Recording state without a recorder.
460        app.state = RecordingState::Recording;
461        handle_push_to_talk_stop(&mut app).await;
462        // Should return to Idle (no recorder → return_to_idle_or_approval).
463        assert_eq!(app.state, RecordingState::Idle);
464    }
465
466    // ── return_to_idle_or_approval ───────────────────────────────────────────
467
468    #[test]
469    fn test_return_to_idle_when_no_pending() {
470        let mut app = VoiceApp::new(test_config()).unwrap();
471        app.state = RecordingState::Injecting;
472        return_to_idle_or_approval(&mut app);
473        assert_eq!(app.state, RecordingState::Idle);
474    }
475
476    #[test]
477    fn test_return_to_approval_pending_when_queue_has_items() {
478        use crate::approval::types::PermissionRequest;
479
480        let mut app = VoiceApp::new(test_config()).unwrap();
481        app.state = RecordingState::Injecting;
482
483        // Add a pending approval.
484        app.approval_queue.add_permission(PermissionRequest {
485            id: "p1".to_string(),
486            permission: "bash".to_string(),
487            metadata: serde_json::Value::Null,
488        });
489
490        return_to_idle_or_approval(&mut app);
491        assert_eq!(app.state, RecordingState::ApprovalPending);
492    }
493
494    // ── try_handle_approval ──────────────────────────────────────────────────
495
496    /// Returns false when the approval queue is empty (nothing to handle).
497    #[tokio::test]
498    async fn test_try_handle_approval_empty_queue_returns_false() {
499        let mut app = VoiceApp::new(test_config()).unwrap();
500        // Queue is empty — any text should return false.
501        let result = try_handle_approval(&mut app, "yes").await;
502        assert!(!result, "empty queue should return false");
503    }
504
505    /// Returns false when the text does not match any permission pattern.
506    #[tokio::test]
507    async fn test_try_handle_approval_permission_no_match_returns_false() {
508        use crate::approval::types::PermissionRequest;
509
510        let mut app = VoiceApp::new(test_config()).unwrap();
511        app.approval_queue.add_permission(PermissionRequest {
512            id: "p1".to_string(),
513            permission: "bash".to_string(),
514            metadata: serde_json::Value::Null,
515        });
516
517        // "hello world" does not match any permission command.
518        let result = try_handle_approval(&mut app, "hello world").await;
519        assert!(!result, "unrecognised text should return false");
520        // Item must still be in the queue.
521        assert!(app.approval_queue.has_pending());
522    }
523
524    /// Returns false when the text does not match any question option.
525    #[tokio::test]
526    async fn test_try_handle_approval_question_no_match_returns_false() {
527        use crate::approval::types::{QuestionInfo, QuestionOption, QuestionRequest};
528
529        let mut app = VoiceApp::new(test_config()).unwrap();
530        app.approval_queue.add_question(QuestionRequest {
531            id: "q1".to_string(),
532            questions: vec![QuestionInfo {
533                question: "Pick one".to_string(),
534                options: vec![
535                    QuestionOption {
536                        label: "Alpha".to_string(),
537                    },
538                    QuestionOption {
539                        label: "Beta".to_string(),
540                    },
541                ],
542                custom: false, // no custom answers allowed
543            }],
544        });
545
546        // "gamma" is not an option and custom is disabled.
547        let result = try_handle_approval(&mut app, "gamma").await;
548        assert!(!result, "unrecognised question answer should return false");
549        assert!(app.approval_queue.has_pending());
550    }
551
552    /// A matching permission command removes the item from the queue and
553    /// returns true.  The bridge call will fail (no server running) but the
554    /// function should still return true and remove the item.
555    #[tokio::test]
556    async fn test_try_handle_approval_permission_match_removes_item_and_returns_true() {
557        use crate::approval::types::PermissionRequest;
558
559        let mut app = VoiceApp::new(test_config()).unwrap();
560        app.approval_queue.add_permission(PermissionRequest {
561            id: "p1".to_string(),
562            permission: "bash".to_string(),
563            metadata: serde_json::Value::Null,
564        });
565        app.state = RecordingState::ApprovalPending;
566
567        // "yes" matches the Once permission pattern.
568        // The bridge call will fail (no server), but the item is still removed.
569        let result = try_handle_approval(&mut app, "yes").await;
570        assert!(result, "matched permission should return true");
571        assert!(
572            !app.approval_queue.has_pending(),
573            "item should be removed from queue after match"
574        );
575    }
576
577    /// A matching question answer removes the item from the queue and returns
578    /// true.
579    #[tokio::test]
580    async fn test_try_handle_approval_question_match_removes_item_and_returns_true() {
581        use crate::approval::types::{QuestionInfo, QuestionOption, QuestionRequest};
582
583        let mut app = VoiceApp::new(test_config()).unwrap();
584        app.approval_queue.add_question(QuestionRequest {
585            id: "q1".to_string(),
586            questions: vec![QuestionInfo {
587                question: "Pick one".to_string(),
588                options: vec![
589                    QuestionOption {
590                        label: "Alpha".to_string(),
591                    },
592                    QuestionOption {
593                        label: "Beta".to_string(),
594                    },
595                ],
596                custom: false,
597            }],
598        });
599        app.state = RecordingState::ApprovalPending;
600
601        // "alpha" matches the first option exactly.
602        let result = try_handle_approval(&mut app, "alpha").await;
603        assert!(result, "matched question answer should return true");
604        assert!(
605            !app.approval_queue.has_pending(),
606            "item should be removed from queue after match"
607        );
608    }
609
610    /// A question rejection phrase removes the item from the queue and returns
611    /// true.
612    #[tokio::test]
613    async fn test_try_handle_approval_question_reject_removes_item_and_returns_true() {
614        use crate::approval::types::{QuestionInfo, QuestionOption, QuestionRequest};
615
616        let mut app = VoiceApp::new(test_config()).unwrap();
617        app.approval_queue.add_question(QuestionRequest {
618            id: "q2".to_string(),
619            questions: vec![QuestionInfo {
620                question: "Pick one".to_string(),
621                options: vec![QuestionOption {
622                    label: "Yes".to_string(),
623                }],
624                custom: false,
625            }],
626        });
627        app.state = RecordingState::ApprovalPending;
628
629        // "skip" is a question rejection phrase.
630        let result = try_handle_approval(&mut app, "skip").await;
631        assert!(result, "question rejection should return true");
632        assert!(
633            !app.approval_queue.has_pending(),
634            "item should be removed from queue after rejection"
635        );
636    }
637
638}