Skip to main content

opencode_voice/app/
mod.rs

1//! Main application orchestrator for OpenCode Voice Mode.
2//!
3//! This module owns the [`VoiceApp`] struct, which wires together all subsystems:
4//! audio capture, transcription, keyboard/hotkey input, SSE event streaming,
5//! approval queue, and the terminal display.
6
7pub mod recording;
8pub mod approval;
9
10use anyhow::Result;
11use std::time::Instant;
12use tokio_util::sync::CancellationToken;
13
14use crate::audio::capture::CpalRecorder;
15use crate::audio::{default_audio_config, AudioConfig};
16use crate::bridge::client::OpenCodeBridge;
17use crate::bridge::events::{OpenCodeEvents, SseEvent};
18use crate::approval::queue::ApprovalQueue;
19use crate::config::AppConfig;
20use crate::input::hotkey::GlobalHotkey;
21use crate::input::keyboard::{is_tty, KeyboardInput};
22use crate::state::{AppEvent, InputEvent, RecordingState};
23use crate::transcribe::engine::WhisperEngine;
24use crate::transcribe::setup::is_whisper_ready;
25use crate::ui::display::{Display, DisplayMeta};
26
27/// The central application struct that owns all subsystem state.
28///
29/// Fields are `pub(crate)` so that `recording` and `approval` submodules can
30/// access them directly without going through getter methods.
31pub struct VoiceApp {
32    /// Resolved application configuration.
33    pub(crate) config: AppConfig,
34
35    /// Current recording state machine state.
36    pub(crate) state: RecordingState,
37
38    /// Terminal display renderer.
39    pub(crate) display: Display,
40
41    /// HTTP client for the OpenCode API.
42    pub(crate) bridge: OpenCodeBridge,
43
44    /// Loaded Whisper transcription engine (None if model not ready).
45    pub(crate) whisper: Option<WhisperEngine>,
46
47    /// FIFO queue for pending permission/question approvals.
48    pub(crate) approval_queue: ApprovalQueue,
49
50    /// Active cpal recorder during push-to-talk recording (None when idle).
51    ///
52    /// Stored here so that `handle_push_to_talk_start` and
53    /// `handle_push_to_talk_stop` can share ownership across two separate
54    /// async calls without moving the recorder across threads.
55    pub(crate) recorder: Option<CpalRecorder>,
56
57    /// Audio configuration derived from app config.
58    pub(crate) audio_config: AudioConfig,
59
60    /// Main event channel sender — cloned and given to input/SSE tasks.
61    pub(crate) event_tx: tokio::sync::mpsc::UnboundedSender<AppEvent>,
62
63    /// Main event channel receiver — consumed by the event loop.
64    event_rx: tokio::sync::mpsc::UnboundedReceiver<AppEvent>,
65
66    /// Cancellation token broadcast to all background tasks.
67    pub(crate) cancel: CancellationToken,
68
69    /// Guard against double-shutdown.
70    is_shutting_down: bool,
71
72    /// Last transcription text shown in the idle display.
73    pub(crate) last_transcript: Option<String>,
74
75    /// Current audio level (0.0–1.0) for the level meter.
76    pub(crate) current_level: Option<f32>,
77
78    /// Current error message for the error display.
79    pub(crate) current_error: Option<String>,
80
81    /// Throttle: last time we rendered an AudioChunk update.
82    last_audio_render: Option<Instant>,
83}
84
85impl VoiceApp {
86    /// Creates a new `VoiceApp` from the given configuration.
87    ///
88    /// Loads the Whisper engine synchronously (blocking) before entering the
89    /// async context.  If the model is not yet downloaded the engine is set to
90    /// `None` and a warning is printed; the app will still start but
91    /// transcription will be unavailable until the model is downloaded.
92    pub fn new(config: AppConfig) -> Result<Self> {
93        // Load WhisperEngine synchronously (blocking) before entering async.
94        // This is intentional: whisper-rs model loading is CPU-bound and must
95        // not block the tokio runtime.
96        let whisper = if is_whisper_ready(&config.data_dir, &config.model_size) {
97            match WhisperEngine::new(&config.whisper_model_path) {
98                Ok(engine) => {
99                    Some(engine)
100                }
101                Err(e) => {
102                    eprintln!("[voice] Warning: failed to load Whisper model: {}", e);
103                    None
104                }
105            }
106        } else {
107            None
108        };
109
110        let bridge = OpenCodeBridge::new(
111            "http://localhost",
112            config.opencode_port,
113            config.server_password.clone(),
114        );
115
116        let audio_config = AudioConfig {
117            device: config.audio_device.clone(),
118            ..default_audio_config()
119        };
120
121        let (event_tx, event_rx) = tokio::sync::mpsc::unbounded_channel::<AppEvent>();
122
123        Ok(VoiceApp {
124            config,
125            state: RecordingState::Idle,
126            display: Display::new(),
127            bridge,
128            whisper,
129            approval_queue: ApprovalQueue::new(),
130            audio_config,
131            recorder: None,
132            event_tx,
133            event_rx,
134            cancel: CancellationToken::new(),
135            is_shutting_down: false,
136            last_transcript: None,
137            current_level: None,
138            current_error: None,
139            last_audio_render: None,
140        })
141    }
142
143    /// Starts the application: spawns background tasks and enters the event loop.
144    ///
145    /// This method does not return until the application shuts down.
146    pub async fn start(&mut self) -> Result<()> {
147        // Warn if Whisper model is not ready.
148        if self.whisper.is_none() {
149            eprintln!(
150                "[voice] Warning: Whisper model not found. Run 'opencode-voice setup' to download it."
151            );
152        }
153
154        // Warn (non-fatal) if OpenCode is not reachable.
155        if !self.bridge.is_connected().await {
156            eprintln!(
157                "[voice] Warning: Cannot connect to OpenCode at port {}. \
158                 Make sure OpenCode is running with --port {}.",
159                self.config.opencode_port, self.config.opencode_port
160            );
161        }
162
163        // Show welcome banner.
164        self.display.show_welcome(
165            &self.config.toggle_key.to_string(),
166            self.config.use_global_hotkey,
167            &self.config.global_hotkey,
168            self.config.push_to_talk,
169        );
170
171        // Spawn keyboard input on a dedicated OS thread (crossterm poll loop is blocking).
172        if is_tty() {
173            let kb_sender = self.event_tx.clone();
174            let kb_cancel = self.cancel.clone();
175            let toggle_key = self.config.toggle_key;
176
177            // We need an UnboundedSender<InputEvent> to pass to KeyboardInput.
178            // Bridge it through a small forwarding task.
179            let (input_tx, mut input_rx) =
180                tokio::sync::mpsc::unbounded_channel::<InputEvent>();
181
182            let event_tx_fwd = self.event_tx.clone();
183            tokio::spawn(async move {
184                while let Some(ev) = input_rx.recv().await {
185                    let _ = event_tx_fwd.send(AppEvent::Input(ev));
186                }
187            });
188
189            std::thread::spawn(move || {
190                let kb = KeyboardInput::new(toggle_key, input_tx, kb_cancel);
191                if let Err(e) = kb.run() {
192                    eprintln!("[voice] Keyboard input error: {}", e);
193                }
194                // Send Quit when the keyboard thread exits so the event loop
195                // knows to shut down.
196                let _ = kb_sender.send(AppEvent::Input(InputEvent::Quit));
197            });
198        }
199
200        // Spawn global hotkey on a dedicated OS thread (rdev::listen blocks).
201        if self.config.use_global_hotkey {
202            let hotkey_name = self.config.global_hotkey.clone();
203            let cancel = self.cancel.clone();
204
205            let (hotkey_tx, mut hotkey_rx) =
206                tokio::sync::mpsc::unbounded_channel::<InputEvent>();
207
208            let event_tx_fwd = self.event_tx.clone();
209            tokio::spawn(async move {
210                while let Some(ev) = hotkey_rx.recv().await {
211                    let _ = event_tx_fwd.send(AppEvent::Input(ev));
212                }
213            });
214
215            match GlobalHotkey::new(&hotkey_name, hotkey_tx, cancel) {
216                Ok(hotkey) => {
217                    std::thread::spawn(move || {
218                        if let Err(e) = hotkey.run() {
219                            eprintln!("[voice] Global hotkey error: {}", e);
220                        }
221                    });
222                }
223                Err(e) => {
224                    eprintln!("[voice] Warning: Could not set up global hotkey: {}", e);
225                }
226            }
227        }
228
229        // Spawn SSE event bridge if approval mode is enabled.
230        if self.config.approval_mode {
231            let (sse_tx, mut sse_rx) =
232                tokio::sync::mpsc::unbounded_channel::<SseEvent>();
233
234            let sse_client = OpenCodeEvents::new(
235                self.bridge.get_base_url().to_string(),
236                self.config.server_password.clone(),
237                sse_tx,
238            );
239            sse_client.start(self.cancel.clone());
240
241            // Forward SseEvent → AppEvent on a tokio task.
242            let event_tx_fwd = self.event_tx.clone();
243            tokio::spawn(async move {
244                while let Some(sse_event) = sse_rx.recv().await {
245                    let app_event = match sse_event {
246                        SseEvent::Connected => AppEvent::SseConnected,
247                        SseEvent::Disconnected(reason) => AppEvent::SseDisconnected(reason),
248                        SseEvent::PermissionAsked(req) => AppEvent::PermissionAsked(req),
249                        SseEvent::PermissionReplied {
250                            session_id,
251                            request_id,
252                            reply,
253                        } => AppEvent::PermissionReplied {
254                            session_id,
255                            request_id,
256                            reply,
257                        },
258                        SseEvent::QuestionAsked(req) => AppEvent::QuestionAsked(req),
259                        SseEvent::QuestionReplied {
260                            session_id,
261                            request_id,
262                            answers,
263                        } => AppEvent::QuestionReplied {
264                            session_id,
265                            request_id,
266                            answers,
267                        },
268                        SseEvent::QuestionRejected {
269                            session_id,
270                            request_id,
271                        } => AppEvent::QuestionRejected {
272                            session_id,
273                            request_id,
274                        },
275                        SseEvent::SessionStatus { session_id, busy } => {
276                            AppEvent::SessionStatus { session_id, busy }
277                        }
278                    };
279                    if event_tx_fwd.send(app_event).is_err() {
280                        break;
281                    }
282                }
283            });
284        }
285
286        // Register SIGINT / SIGTERM signal handlers.
287        self.register_signal_handlers();
288
289        // Render initial idle state.
290        self.render_display();
291
292        // Enter the main event loop.
293        self.run_event_loop().await;
294
295        Ok(())
296    }
297
298    /// Registers OS signal handlers that send `AppEvent::Shutdown` on SIGINT/SIGTERM.
299    fn register_signal_handlers(&self) {
300        let tx_sigint = self.event_tx.clone();
301        let tx_sigterm = self.event_tx.clone();
302
303        tokio::spawn(async move {
304            if let Ok(mut sig) = tokio::signal::unix::signal(
305                tokio::signal::unix::SignalKind::interrupt(),
306            ) {
307                if sig.recv().await.is_some() {
308                    let _ = tx_sigint.send(AppEvent::Shutdown);
309                }
310            }
311        });
312
313        tokio::spawn(async move {
314            if let Ok(mut sig) = tokio::signal::unix::signal(
315                tokio::signal::unix::SignalKind::terminate(),
316            ) {
317                if sig.recv().await.is_some() {
318                    let _ = tx_sigterm.send(AppEvent::Shutdown);
319                }
320            }
321        });
322    }
323
324    /// The main event loop: receives `AppEvent`s and dispatches them.
325    async fn run_event_loop(&mut self) {
326        loop {
327            let event = match self.event_rx.recv().await {
328                Some(e) => e,
329                None => break, // All senders dropped — exit.
330            };
331
332            match event {
333                AppEvent::Input(input_event) => {
334                    self.handle_input(input_event).await;
335                    if self.is_shutting_down {
336                        break;
337                    }
338                }
339
340                AppEvent::SseConnected => {
341                    // Update display to reflect connectivity.
342                    self.render_display();
343                }
344
345                AppEvent::SseDisconnected(reason) => {
346                    if let Some(msg) = reason {
347                        self.display.log(&format!("[voice] SSE disconnected: {}", msg));
348                    }
349                    self.render_display();
350                }
351
352                AppEvent::PermissionAsked(req) => {
353                    approval::handle_sse_permission_asked(self, req);
354                }
355
356                AppEvent::PermissionReplied {
357                    session_id,
358                    request_id,
359                    reply,
360                } => {
361                    approval::handle_sse_permission_replied(self, &session_id, &request_id, &reply);
362                }
363
364                AppEvent::QuestionAsked(req) => {
365                    approval::handle_sse_question_asked(self, req);
366                }
367
368                AppEvent::QuestionReplied {
369                    session_id,
370                    request_id,
371                    answers,
372                } => {
373                    approval::handle_sse_question_replied(self, &session_id, &request_id, answers);
374                }
375
376                AppEvent::QuestionRejected {
377                    session_id,
378                    request_id,
379                } => {
380                    approval::handle_sse_question_rejected(self, &session_id, &request_id);
381                }
382
383                AppEvent::SessionStatus { session_id, busy } => {
384                    approval::handle_sse_session_status(self, &session_id, busy);
385                }
386
387                AppEvent::AudioChunk { rms_energy } => {
388                    self.current_level = Some(rms_energy);
389                    // Throttle display updates to ~10 fps to avoid spammy output.
390                    let now = Instant::now();
391                    let should_render = self
392                        .last_audio_render
393                        .map(|t| now.duration_since(t).as_millis() >= 100)
394                        .unwrap_or(true);
395                    if should_render {
396                        self.last_audio_render = Some(now);
397                        self.render_display();
398                    }
399                }
400
401                AppEvent::RecoverFromError => {
402                    if self.state == RecordingState::Error {
403                        self.state = RecordingState::Idle;
404                        self.current_error = None;
405                        self.render_display();
406                    }
407                }
408
409                AppEvent::Shutdown => {
410                    self.shutdown();
411                    break;
412                }
413            }
414        }
415    }
416
417    /// Handles an [`InputEvent`] from keyboard or global hotkey.
418    async fn handle_input(&mut self, event: InputEvent) {
419        match event {
420            InputEvent::Toggle => {
421                if self.config.push_to_talk {
422                    // PTT is driven by KeyDown/KeyUp; ignore Toggle to prevent
423                    // the global hotkey's KeyRelease (which sends both KeyUp
424                    // and Toggle) from re-starting recording after KeyUp
425                    // already stopped it.
426                } else {
427                    // Standard toggle mode.
428                    recording::handle_toggle(self).await;
429                }
430            }
431
432            InputEvent::KeyDown => {
433                if self.config.push_to_talk {
434                    recording::handle_push_to_talk_start(self).await;
435                }
436            }
437
438            InputEvent::KeyUp => {
439                if self.config.push_to_talk {
440                    recording::handle_push_to_talk_stop(self).await;
441                }
442            }
443
444            InputEvent::Quit => {
445                self.shutdown();
446            }
447        }
448    }
449
450    /// Transitions to the Error state, updates the display, and schedules
451    /// automatic recovery after 3 seconds.
452    pub(crate) fn handle_error(&mut self, err: &str) {
453        self.state = RecordingState::Error;
454        self.current_error = Some(err.to_string());
455        self.render_display();
456
457        // Schedule recovery: after 3 seconds send RecoverFromError.
458        let tx = self.event_tx.clone();
459        tokio::spawn(async move {
460            tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
461            let _ = tx.send(AppEvent::RecoverFromError);
462        });
463    }
464
465    /// Shuts down the application.
466    ///
467    /// Guarded by `is_shutting_down` to prevent double-shutdown.
468    /// Cancels the global cancellation token and clears the display.
469    pub(crate) fn shutdown(&mut self) {
470        if self.is_shutting_down {
471            return;
472        }
473        self.is_shutting_down = true;
474
475        // Cancel all background tasks.
476        self.cancel.cancel();
477
478        // Clear the terminal display.
479        self.display.clear();
480
481        eprintln!("[voice] Shutting down.");
482    }
483
484    /// Renders the current state to the terminal display.
485    pub(crate) fn render_display(&mut self) {
486        let toggle_key_str = self.config.toggle_key.to_string();
487        let approval = self.approval_queue.peek();
488        let approval_count = self.approval_queue.len();
489
490        // Read live duration from the active recorder.
491        let duration = self.recorder.as_ref().map(|r| r.duration());
492
493        // Amplify level for display — raw RMS from speech is typically
494        // 0.01–0.1, which would render as an empty bar at width 8.
495        let display_level = self.current_level.map(|l| (l * 5.0).min(1.0));
496
497        let global_hotkey_name = if self.config.use_global_hotkey {
498            Some(self.config.global_hotkey.as_str())
499        } else {
500            None
501        };
502
503        let meta = DisplayMeta {
504            level: display_level,
505            error: self.current_error.as_deref(),
506            toggle_key: Some(&toggle_key_str),
507            global_hotkey_name,
508            approval,
509            approval_count: Some(approval_count),
510            transcript: self.last_transcript.as_deref(),
511            duration,
512        };
513
514        self.display.update(self.state, &meta);
515    }
516}
517
518#[cfg(test)]
519mod tests {
520    use super::*;
521    use crate::config::{AppConfig, ModelSize};
522    use std::path::PathBuf;
523
524    /// Builds a minimal `AppConfig` suitable for unit tests.
525    fn test_config() -> AppConfig {
526        AppConfig {
527            whisper_model_path: PathBuf::from("/nonexistent/model.bin"),
528            opencode_port: 4096,
529            toggle_key: ' ',
530            model_size: ModelSize::TinyEn,
531            auto_submit: true,
532            server_password: None,
533            data_dir: PathBuf::from("/nonexistent/data"),
534            audio_device: None,
535            use_global_hotkey: false,
536            global_hotkey: "right_option".to_string(),
537            push_to_talk: true,
538            approval_mode: false,
539        }
540    }
541
542    #[test]
543    fn test_voice_app_new_initializes_idle_state() {
544        let config = test_config();
545        let app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
546        assert_eq!(app.state, RecordingState::Idle);
547    }
548
549    #[test]
550    fn test_voice_app_new_whisper_none_when_model_missing() {
551        let config = test_config();
552        let app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
553        // Model path is /nonexistent/model.bin — should not be loaded.
554        assert!(app.whisper.is_none());
555    }
556
557    #[test]
558    fn test_voice_app_new_approval_queue_empty() {
559        let config = test_config();
560        let app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
561        assert!(!app.approval_queue.has_pending());
562    }
563
564    #[test]
565    fn test_voice_app_new_not_shutting_down() {
566        let config = test_config();
567        let app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
568        assert!(!app.is_shutting_down);
569    }
570
571    #[test]
572    fn test_voice_app_shutdown_sets_flag() {
573        let config = test_config();
574        let mut app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
575        app.shutdown();
576        assert!(app.is_shutting_down);
577    }
578
579    #[test]
580    fn test_voice_app_shutdown_idempotent() {
581        let config = test_config();
582        let mut app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
583        app.shutdown();
584        app.shutdown(); // Second call must not panic.
585        assert!(app.is_shutting_down);
586    }
587
588    #[test]
589    fn test_voice_app_shutdown_cancels_token() {
590        let config = test_config();
591        let mut app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
592        assert!(!app.cancel.is_cancelled());
593        app.shutdown();
594        assert!(app.cancel.is_cancelled());
595    }
596
597    #[tokio::test]
598    async fn test_handle_error_sets_error_state() {
599        let config = test_config();
600        let mut app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
601        app.handle_error("test error");
602        assert_eq!(app.state, RecordingState::Error);
603        assert_eq!(app.current_error.as_deref(), Some("test error"));
604    }
605
606    #[tokio::test]
607    async fn test_recover_from_error_transitions_to_idle() {
608        let config = test_config();
609        let mut app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
610        app.state = RecordingState::Error;
611        app.current_error = Some("some error".to_string());
612
613        // Simulate the RecoverFromError event being processed.
614        if app.state == RecordingState::Error {
615            app.state = RecordingState::Idle;
616            app.current_error = None;
617        }
618
619        assert_eq!(app.state, RecordingState::Idle);
620        assert!(app.current_error.is_none());
621    }
622
623    #[test]
624    fn test_voice_app_event_channel_works() {
625        let config = test_config();
626        let app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
627        // Sending an event should succeed while the receiver is alive.
628        let result = app.event_tx.send(AppEvent::Shutdown);
629        assert!(result.is_ok());
630    }
631}