Skip to main content

opencode_voice/app/
mod.rs

1//! Main application orchestrator for OpenCode Voice Mode.
2//!
3//! This module owns the [`VoiceApp`] struct, which wires together all subsystems:
4//! audio capture, transcription, keyboard/hotkey input, SSE event streaming,
5//! approval queue, and the terminal display.
6
7pub mod recording;
8pub mod approval;
9
10use anyhow::Result;
11use tokio_util::sync::CancellationToken;
12
13use crate::audio::capture::CpalRecorder;
14use crate::audio::{default_audio_config, AudioConfig};
15use crate::bridge::client::OpenCodeBridge;
16use crate::bridge::events::{OpenCodeEvents, SseEvent};
17use crate::approval::queue::ApprovalQueue;
18use crate::config::AppConfig;
19use crate::input::hotkey::GlobalHotkey;
20use crate::input::keyboard::{is_tty, KeyboardInput};
21use crate::state::{AppEvent, InputEvent, RecordingState};
22use crate::transcribe::engine::WhisperEngine;
23use crate::transcribe::setup::is_whisper_ready;
24use crate::ui::display::{Display, DisplayMeta};
25
26/// Returns a human-readable label for the toggle key character.
27/// Turns `' '` into `"Space"` so the UI doesn't show a blank.
28fn format_toggle_key(ch: char) -> String {
29    match ch {
30        ' ' => "Space".into(),
31        c => c.to_string(),
32    }
33}
34
35/// The central application struct that owns all subsystem state.
36///
37/// Fields are `pub(crate)` so that `recording` and `approval` submodules can
38/// access them directly without going through getter methods.
39pub struct VoiceApp {
40    /// Resolved application configuration.
41    pub(crate) config: AppConfig,
42
43    /// Current recording state machine state.
44    pub(crate) state: RecordingState,
45
46    /// Terminal display renderer.
47    pub(crate) display: Display,
48
49    /// HTTP client for the OpenCode API.
50    pub(crate) bridge: OpenCodeBridge,
51
52    /// Loaded Whisper transcription engine (None if model not ready).
53    pub(crate) whisper: Option<WhisperEngine>,
54
55    /// FIFO queue for pending permission/question approvals.
56    pub(crate) approval_queue: ApprovalQueue,
57
58    /// Active cpal recorder during push-to-talk recording (None when idle).
59    ///
60    /// Stored here so that `handle_push_to_talk_start` and
61    /// `handle_push_to_talk_stop` can share ownership across two separate
62    /// async calls without moving the recorder across threads.
63    pub(crate) recorder: Option<CpalRecorder>,
64
65    /// Audio configuration derived from app config.
66    pub(crate) audio_config: AudioConfig,
67
68    /// Main event channel sender — cloned and given to input/SSE tasks.
69    pub(crate) event_tx: tokio::sync::mpsc::UnboundedSender<AppEvent>,
70
71    /// Main event channel receiver — consumed by the event loop.
72    event_rx: tokio::sync::mpsc::UnboundedReceiver<AppEvent>,
73
74    /// Cancellation token broadcast to all background tasks.
75    pub(crate) cancel: CancellationToken,
76
77    /// Guard against double-shutdown.
78    is_shutting_down: bool,
79
80    /// Last transcription text shown in the idle display.
81    pub(crate) last_transcript: Option<String>,
82
83    /// Current audio level (0.0–1.0) for the level meter.
84    pub(crate) current_level: Option<f32>,
85
86    /// Current error message for the error display.
87    pub(crate) current_error: Option<String>,
88
89    /// Spinner frame counter for animated states (Transcribing).
90    spinner_frame: usize,
91
92    /// Suppresses the next Toggle event after a KeyUp. The evdev listener
93    /// sends KeyUp+Toggle on key release; without this flag the Toggle
94    /// would re-start recording after KeyUp completed the full pipeline.
95    suppress_next_toggle: bool,
96
97    /// Counter for throttling debug audio level output.
98    debug_audio_counter: usize,
99}
100
101impl VoiceApp {
102    /// Prints a debug log line with `\r\n` (works correctly in raw mode).
103    pub(crate) fn debug_log(&self, msg: std::fmt::Arguments) {
104        if self.config.debug {
105            eprint!("[debug] {}\r\n", msg);
106        }
107    }
108
109    /// Creates a new `VoiceApp` from the given configuration.
110    ///
111    /// Loads the Whisper engine synchronously (blocking) before entering the
112    /// async context.  If the model is not yet downloaded the engine is set to
113    /// `None` and a warning is printed; the app will still start but
114    /// transcription will be unavailable until the model is downloaded.
115    pub fn new(config: AppConfig) -> Result<Self> {
116        // Load WhisperEngine synchronously (blocking) before entering async.
117        // This is intentional: whisper-rs model loading is CPU-bound and must
118        // not block the tokio runtime.
119        let whisper = if is_whisper_ready(&config.data_dir, &config.model_size) {
120            match WhisperEngine::new(&config.whisper_model_path, config.model_size.is_multilingual()) {
121                Ok(engine) => {
122                    Some(engine)
123                }
124                Err(e) => {
125                    eprintln!("[voice] Warning: failed to load Whisper model: {}", e);
126                    None
127                }
128            }
129        } else {
130            None
131        };
132
133        let bridge = OpenCodeBridge::new(
134            "http://localhost",
135            config.opencode_port,
136            config.server_password.clone(),
137        );
138
139        let audio_config = AudioConfig {
140            device: config.audio_device.clone(),
141            ..default_audio_config()
142        };
143
144        let (event_tx, event_rx) = tokio::sync::mpsc::unbounded_channel::<AppEvent>();
145
146        Ok(VoiceApp {
147            config,
148            state: RecordingState::Idle,
149            display: Display::new(),
150            bridge,
151            whisper,
152            approval_queue: ApprovalQueue::new(),
153            audio_config,
154            recorder: None,
155            event_tx,
156            event_rx,
157            cancel: CancellationToken::new(),
158            is_shutting_down: false,
159            last_transcript: None,
160            current_level: None,
161            current_error: None,
162            spinner_frame: 0,
163            suppress_next_toggle: false,
164            debug_audio_counter: 0,
165        })
166    }
167
168    /// Starts the application: spawns background tasks and enters the event loop.
169    ///
170    /// This method does not return until the application shuts down.
171    pub async fn start(&mut self) -> Result<()> {
172        // Warn if Whisper model is not ready.
173        if self.whisper.is_none() {
174            eprintln!(
175                "[voice] Warning: Whisper model not found. Run 'opencode-voice setup' to download it."
176            );
177        }
178
179        // Warn (non-fatal) if OpenCode is not reachable. Skip in debug mode.
180        if !self.config.debug && !self.bridge.is_connected().await {
181            eprintln!(
182                "[voice] Warning: Cannot connect to OpenCode at port {}. \
183                 Make sure OpenCode is running with --port {}.",
184                self.config.opencode_port, self.config.opencode_port
185            );
186        }
187
188        // Set up global hotkey before the welcome banner so the banner
189        // reflects any fallback from PTT to toggle mode.
190        if self.config.use_global_hotkey {
191            let hotkey_name = self.config.global_hotkey.clone();
192            let cancel = self.cancel.clone();
193
194            let (hotkey_tx, mut hotkey_rx) =
195                tokio::sync::mpsc::unbounded_channel::<InputEvent>();
196
197            let event_tx_fwd = self.event_tx.clone();
198            tokio::spawn(async move {
199                while let Some(ev) = hotkey_rx.recv().await {
200                    let _ = event_tx_fwd.send(AppEvent::Input(ev));
201                }
202            });
203
204            match GlobalHotkey::new(&hotkey_name, hotkey_tx, cancel) {
205                Ok(hotkey) => {
206                    std::thread::spawn(move || {
207                        if let Err(e) = hotkey.run() {
208                            eprintln!("[voice] Global hotkey error: {}", e);
209                        }
210                    });
211                }
212                Err(e) => {
213                    eprintln!("[voice] Warning: Could not set up global hotkey: {}", e);
214                    if self.config.push_to_talk {
215                        eprintln!("[voice] Falling back to toggle mode (press space to start/stop recording).");
216                        self.config.push_to_talk = false;
217                    }
218                }
219            }
220        }
221
222        // Show welcome banner BEFORE spawning the keyboard thread.
223        // The keyboard thread enables raw mode, which breaks println!
224        // (\n no longer includes \r, causing lines to shift right).
225        if self.config.debug {
226            self.debug_log(format_args!("mode: {}", if self.config.push_to_talk { "push-to-talk" } else { "toggle" }));
227            self.debug_log(format_args!("hotkey: {} ({})", self.config.global_hotkey,
228                if self.config.use_global_hotkey { "enabled" } else { "disabled" }));
229            self.debug_log(format_args!("toggle key: '{}'", self.config.toggle_key));
230            self.debug_log(format_args!("device: {}", self.config.audio_device.as_deref().unwrap_or("(default)")));
231            self.debug_log(format_args!("model: {} ({})", self.config.model_size,
232                if self.whisper.is_some() { "loaded" } else { "not loaded" }));
233            self.debug_log(format_args!("ready"));
234        } else {
235            self.display.show_welcome(
236                &format_toggle_key(self.config.toggle_key),
237                self.config.use_global_hotkey,
238                &self.config.global_hotkey,
239                self.config.push_to_talk,
240            );
241        }
242
243        // Spawn keyboard input on a dedicated OS thread (crossterm poll loop is blocking).
244        if is_tty() {
245            let kb_sender = self.event_tx.clone();
246            let kb_cancel = self.cancel.clone();
247            let toggle_key = self.config.toggle_key;
248
249            let (input_tx, mut input_rx) =
250                tokio::sync::mpsc::unbounded_channel::<InputEvent>();
251
252            let event_tx_fwd = self.event_tx.clone();
253            tokio::spawn(async move {
254                while let Some(ev) = input_rx.recv().await {
255                    let _ = event_tx_fwd.send(AppEvent::Input(ev));
256                }
257            });
258
259            std::thread::spawn(move || {
260                let kb = KeyboardInput::new(toggle_key, input_tx, kb_cancel);
261                if let Err(e) = kb.run() {
262                    eprintln!("[voice] Keyboard input error: {}", e);
263                }
264                let _ = kb_sender.send(AppEvent::Input(InputEvent::Quit));
265            });
266        }
267
268        // Spawn SSE event bridge for permission/question handling.
269        if self.config.handle_prompts && !self.config.debug {
270            let (sse_tx, mut sse_rx) =
271                tokio::sync::mpsc::unbounded_channel::<SseEvent>();
272
273            let sse_client = OpenCodeEvents::new(
274                self.bridge.get_base_url().to_string(),
275                self.config.server_password.clone(),
276                sse_tx,
277            );
278            sse_client.start(self.cancel.clone());
279
280            // Forward SseEvent → AppEvent on a tokio task.
281            let event_tx_fwd = self.event_tx.clone();
282            tokio::spawn(async move {
283                while let Some(sse_event) = sse_rx.recv().await {
284                    let app_event = match sse_event {
285                        SseEvent::Connected => AppEvent::SseConnected,
286                        SseEvent::Disconnected(reason) => AppEvent::SseDisconnected(reason),
287                        SseEvent::PermissionAsked(req) => AppEvent::PermissionAsked(req),
288                        SseEvent::PermissionReplied {
289                            session_id,
290                            request_id,
291                            reply,
292                        } => AppEvent::PermissionReplied {
293                            session_id,
294                            request_id,
295                            reply,
296                        },
297                        SseEvent::QuestionAsked(req) => AppEvent::QuestionAsked(req),
298                        SseEvent::QuestionReplied {
299                            session_id,
300                            request_id,
301                            answers,
302                        } => AppEvent::QuestionReplied {
303                            session_id,
304                            request_id,
305                            answers,
306                        },
307                        SseEvent::QuestionRejected {
308                            session_id,
309                            request_id,
310                        } => AppEvent::QuestionRejected {
311                            session_id,
312                            request_id,
313                        },
314                        SseEvent::SessionStatus { session_id, busy } => {
315                            AppEvent::SessionStatus { session_id, busy }
316                        }
317                    };
318                    if event_tx_fwd.send(app_event).is_err() {
319                        break;
320                    }
321                }
322            });
323        }
324
325        // Register SIGINT / SIGTERM signal handlers.
326        self.register_signal_handlers();
327
328        // Spawn a 10 Hz UI tick for animations (recording timer, spinner).
329        // Skip in debug mode — no animated display.
330        if !self.config.debug {
331            let tick_tx = self.event_tx.clone();
332            let tick_cancel = self.cancel.clone();
333            tokio::spawn(async move {
334                let mut interval = tokio::time::interval(tokio::time::Duration::from_millis(100));
335                loop {
336                    interval.tick().await;
337                    if tick_cancel.is_cancelled() {
338                        break;
339                    }
340                    if tick_tx.send(AppEvent::Tick).is_err() {
341                        break;
342                    }
343                }
344            });
345        }
346
347        // Render initial idle state (skip in debug mode).
348        if !self.config.debug {
349            self.render_display();
350        }
351
352        // Enter the main event loop.
353        self.run_event_loop().await;
354
355        Ok(())
356    }
357
358    /// Registers OS signal handlers that send `AppEvent::Shutdown` on SIGINT/SIGTERM.
359    fn register_signal_handlers(&self) {
360        let tx_sigint = self.event_tx.clone();
361        let tx_sigterm = self.event_tx.clone();
362
363        tokio::spawn(async move {
364            if let Ok(mut sig) = tokio::signal::unix::signal(
365                tokio::signal::unix::SignalKind::interrupt(),
366            ) {
367                if sig.recv().await.is_some() {
368                    let _ = tx_sigint.send(AppEvent::Shutdown);
369                }
370            }
371        });
372
373        tokio::spawn(async move {
374            if let Ok(mut sig) = tokio::signal::unix::signal(
375                tokio::signal::unix::SignalKind::terminate(),
376            ) {
377                if sig.recv().await.is_some() {
378                    let _ = tx_sigterm.send(AppEvent::Shutdown);
379                }
380            }
381        });
382    }
383
384    /// The main event loop: receives `AppEvent`s and dispatches them.
385    async fn run_event_loop(&mut self) {
386        loop {
387            let event = match self.event_rx.recv().await {
388                Some(e) => e,
389                None => break, // All senders dropped — exit.
390            };
391
392            match event {
393                AppEvent::Input(input_event) => {
394                    self.handle_input(input_event).await;
395                    if self.is_shutting_down {
396                        break;
397                    }
398                }
399
400                AppEvent::SseConnected => {
401                    // Update display to reflect connectivity.
402                    self.render_display();
403                }
404
405                AppEvent::SseDisconnected(reason) => {
406                    if let Some(msg) = reason {
407                        self.display.log(&format!("[voice] SSE disconnected: {}", msg));
408                    }
409                    self.render_display();
410                }
411
412                AppEvent::PermissionAsked(req) => {
413                    approval::handle_sse_permission_asked(self, req);
414                }
415
416                AppEvent::PermissionReplied {
417                    session_id,
418                    request_id,
419                    reply,
420                } => {
421                    approval::handle_sse_permission_replied(self, &session_id, &request_id, &reply);
422                }
423
424                AppEvent::QuestionAsked(req) => {
425                    approval::handle_sse_question_asked(self, req);
426                }
427
428                AppEvent::QuestionReplied {
429                    session_id,
430                    request_id,
431                    answers,
432                } => {
433                    approval::handle_sse_question_replied(self, &session_id, &request_id, answers);
434                }
435
436                AppEvent::QuestionRejected {
437                    session_id,
438                    request_id,
439                } => {
440                    approval::handle_sse_question_rejected(self, &session_id, &request_id);
441                }
442
443                AppEvent::SessionStatus { session_id, busy } => {
444                    approval::handle_sse_session_status(self, &session_id, busy);
445                }
446
447                AppEvent::AudioChunk { rms_energy } => {
448                    // Just store the level; the Tick timer handles rendering.
449                    if self.config.debug {
450                        self.debug_audio_counter += 1;
451                        if self.debug_audio_counter % 10 == 0 {
452                            self.debug_log(format_args!("audio level: {:.4}", rms_energy));
453                        }
454                    }
455                    self.current_level = Some(rms_energy);
456                }
457
458                AppEvent::Tick => {
459                    match self.state {
460                        RecordingState::Recording | RecordingState::Transcribing => {
461                            self.spinner_frame = self.spinner_frame.wrapping_add(1);
462                            self.render_display();
463                        }
464                        _ => {}
465                    }
466                }
467
468                AppEvent::RecoverFromError => {
469                    if self.state == RecordingState::Error {
470                        self.state = RecordingState::Idle;
471                        self.current_error = None;
472                        self.render_display();
473                    }
474                }
475
476                AppEvent::Shutdown => {
477                    self.shutdown();
478                    break;
479                }
480            }
481        }
482    }
483
484    /// Handles an [`InputEvent`] from keyboard or global hotkey.
485    async fn handle_input(&mut self, event: InputEvent) {
486        self.debug_log(format_args!("input: {:?}  state: {:?}", event, self.state));
487        match event {
488            InputEvent::Toggle => {
489                    if self.suppress_next_toggle {
490                        self.suppress_next_toggle = false;
491                    } else if self.config.push_to_talk {
492                        // Terminal keyboard (space) in PTT mode: route through
493                        // PTT start/stop so it works as a toggle.
494                        match self.state {
495                            RecordingState::Idle => {
496                                recording::handle_push_to_talk_start(self).await;
497                            }
498                            RecordingState::Recording => {
499                                recording::handle_push_to_talk_stop(self).await;
500                            }
501                            _ => {}
502                        }
503                    } else {
504                        recording::handle_toggle(self).await;
505                    }
506                }
507
508            InputEvent::KeyDown => {
509                if self.config.push_to_talk {
510                    recording::handle_push_to_talk_start(self).await;
511                }
512            }
513
514            InputEvent::KeyUp => {
515                if self.config.push_to_talk {
516                    self.suppress_next_toggle = true;
517                    recording::handle_push_to_talk_stop(self).await;
518                }
519            }
520
521            InputEvent::Quit => {
522                self.shutdown();
523            }
524        }
525    }
526
527    /// Transitions to the Error state, updates the display, and schedules
528    /// automatic recovery after 3 seconds.
529    pub(crate) fn handle_error(&mut self, err: &str) {
530        self.state = RecordingState::Error;
531        self.current_error = Some(err.to_string());
532        self.render_display();
533
534        // Schedule recovery: after 3 seconds send RecoverFromError.
535        let tx = self.event_tx.clone();
536        tokio::spawn(async move {
537            tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
538            let _ = tx.send(AppEvent::RecoverFromError);
539        });
540    }
541
542    /// Shuts down the application.
543    ///
544    /// Guarded by `is_shutting_down` to prevent double-shutdown.
545    /// Cancels the global cancellation token and clears the display.
546    pub(crate) fn shutdown(&mut self) {
547        if self.is_shutting_down {
548            return;
549        }
550        self.is_shutting_down = true;
551
552        // Cancel all background tasks.
553        self.cancel.cancel();
554
555        // Clear the terminal display (skip in debug mode).
556        if !self.config.debug {
557            self.display.clear();
558        }
559
560        eprintln!("[voice] Shutting down.");
561    }
562
563    /// Renders the current state to the terminal display.
564    /// No-op in debug mode.
565    pub(crate) fn render_display(&mut self) {
566        if self.config.debug {
567            return;
568        }
569        let toggle_key_str = format_toggle_key(self.config.toggle_key);
570        let approval = self.approval_queue.peek();
571        let approval_count = self.approval_queue.len();
572
573        // Read live duration from the active recorder.
574        let duration = self.recorder.as_ref().map(|r| r.duration());
575
576        // Convert RMS to a perceptual (logarithmic) display level.
577        // Raw RMS varies wildly across mic/OS/gain setups (0.001–0.3).
578        // Linear scaling breaks for quiet mics. dB scale works universally:
579        //   -60 dB (silence) → 0.0,  0 dB (full-scale) → 1.0
580        let display_level = self.current_level.map(|l| {
581            let db = 20.0 * l.max(1e-7).log10();
582            ((db + 60.0) / 60.0).clamp(0.0, 1.0)
583        });
584
585        let global_hotkey_name = if self.config.use_global_hotkey {
586            Some(self.config.global_hotkey.as_str())
587        } else {
588            None
589        };
590
591        let meta = DisplayMeta {
592            level: display_level,
593            error: self.current_error.as_deref(),
594            toggle_key: Some(&toggle_key_str),
595            global_hotkey_name,
596            approval,
597            approval_count: Some(approval_count),
598            transcript: self.last_transcript.as_deref(),
599            duration,
600            spinner_frame: self.spinner_frame,
601        };
602
603        self.display.update(self.state, &meta);
604    }
605}
606
607#[cfg(test)]
608mod tests {
609    use super::*;
610    use crate::config::{AppConfig, ModelSize};
611    use std::path::PathBuf;
612
613    /// Builds a minimal `AppConfig` suitable for unit tests.
614    fn test_config() -> AppConfig {
615        AppConfig {
616            whisper_model_path: PathBuf::from("/nonexistent/model.bin"),
617            opencode_port: 4096,
618            toggle_key: ' ',
619            model_size: ModelSize::TinyEn,
620            auto_submit: true,
621            server_password: None,
622            data_dir: PathBuf::from("/nonexistent/data"),
623            audio_device: None,
624            use_global_hotkey: false,
625            global_hotkey: "right_option".to_string(),
626            push_to_talk: true,
627            handle_prompts: false,
628            debug: false,
629        }
630    }
631
632    #[test]
633    fn test_voice_app_new_initializes_idle_state() {
634        let config = test_config();
635        let app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
636        assert_eq!(app.state, RecordingState::Idle);
637    }
638
639    #[test]
640    fn test_voice_app_new_whisper_none_when_model_missing() {
641        let config = test_config();
642        let app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
643        // Model path is /nonexistent/model.bin — should not be loaded.
644        assert!(app.whisper.is_none());
645    }
646
647    #[test]
648    fn test_voice_app_new_approval_queue_empty() {
649        let config = test_config();
650        let app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
651        assert!(!app.approval_queue.has_pending());
652    }
653
654    #[test]
655    fn test_voice_app_new_not_shutting_down() {
656        let config = test_config();
657        let app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
658        assert!(!app.is_shutting_down);
659    }
660
661    #[test]
662    fn test_voice_app_shutdown_sets_flag() {
663        let config = test_config();
664        let mut app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
665        app.shutdown();
666        assert!(app.is_shutting_down);
667    }
668
669    #[test]
670    fn test_voice_app_shutdown_idempotent() {
671        let config = test_config();
672        let mut app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
673        app.shutdown();
674        app.shutdown(); // Second call must not panic.
675        assert!(app.is_shutting_down);
676    }
677
678    #[test]
679    fn test_voice_app_shutdown_cancels_token() {
680        let config = test_config();
681        let mut app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
682        assert!(!app.cancel.is_cancelled());
683        app.shutdown();
684        assert!(app.cancel.is_cancelled());
685    }
686
687    #[tokio::test]
688    async fn test_handle_error_sets_error_state() {
689        let config = test_config();
690        let mut app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
691        app.handle_error("test error");
692        assert_eq!(app.state, RecordingState::Error);
693        assert_eq!(app.current_error.as_deref(), Some("test error"));
694    }
695
696    #[tokio::test]
697    async fn test_recover_from_error_transitions_to_idle() {
698        let config = test_config();
699        let mut app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
700        app.state = RecordingState::Error;
701        app.current_error = Some("some error".to_string());
702
703        // Simulate the RecoverFromError event being processed.
704        if app.state == RecordingState::Error {
705            app.state = RecordingState::Idle;
706            app.current_error = None;
707        }
708
709        assert_eq!(app.state, RecordingState::Idle);
710        assert!(app.current_error.is_none());
711    }
712
713    #[test]
714    fn test_voice_app_event_channel_works() {
715        let config = test_config();
716        let app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
717        // Sending an event should succeed while the receiver is alive.
718        let result = app.event_tx.send(AppEvent::Shutdown);
719        assert!(result.is_ok());
720    }
721}