Skip to main content

opencode_voice/app/
mod.rs

1//! Main application orchestrator for OpenCode Voice Mode.
2//!
3//! This module owns the [`VoiceApp`] struct, which wires together all subsystems:
4//! audio capture, transcription, keyboard/hotkey input, SSE event streaming,
5//! approval queue, and the terminal display.
6
7pub mod recording;
8pub mod approval;
9
10use anyhow::Result;
11use std::time::Instant;
12use tokio_util::sync::CancellationToken;
13
14use crate::audio::capture::CpalRecorder;
15use crate::audio::{default_audio_config, AudioConfig};
16use crate::bridge::client::OpenCodeBridge;
17use crate::bridge::events::{OpenCodeEvents, SseEvent};
18use crate::approval::queue::ApprovalQueue;
19use crate::config::AppConfig;
20use crate::input::hotkey::GlobalHotkey;
21use crate::input::keyboard::{is_tty, KeyboardInput};
22use crate::state::{AppEvent, InputEvent, RecordingState};
23use crate::transcribe::engine::WhisperEngine;
24use crate::transcribe::setup::is_whisper_ready;
25use crate::ui::display::{Display, DisplayMeta};
26
27/// The central application struct that owns all subsystem state.
28///
29/// Fields are `pub(crate)` so that `recording` and `approval` submodules can
30/// access them directly without going through getter methods.
31pub struct VoiceApp {
32    /// Resolved application configuration.
33    pub(crate) config: AppConfig,
34
35    /// Current recording state machine state.
36    pub(crate) state: RecordingState,
37
38    /// Terminal display renderer.
39    pub(crate) display: Display,
40
41    /// HTTP client for the OpenCode API.
42    pub(crate) bridge: OpenCodeBridge,
43
44    /// Loaded Whisper transcription engine (None if model not ready).
45    pub(crate) whisper: Option<WhisperEngine>,
46
47    /// FIFO queue for pending permission/question approvals.
48    pub(crate) approval_queue: ApprovalQueue,
49
50    /// Active cpal recorder during push-to-talk recording (None when idle).
51    ///
52    /// Stored here so that `handle_push_to_talk_start` and
53    /// `handle_push_to_talk_stop` can share ownership across two separate
54    /// async calls without moving the recorder across threads.
55    pub(crate) recorder: Option<CpalRecorder>,
56
57    /// Audio configuration derived from app config.
58    pub(crate) audio_config: AudioConfig,
59
60    /// Main event channel sender — cloned and given to input/SSE tasks.
61    pub(crate) event_tx: tokio::sync::mpsc::UnboundedSender<AppEvent>,
62
63    /// Main event channel receiver — consumed by the event loop.
64    event_rx: tokio::sync::mpsc::UnboundedReceiver<AppEvent>,
65
66    /// Cancellation token broadcast to all background tasks.
67    pub(crate) cancel: CancellationToken,
68
69    /// Guard against double-shutdown.
70    is_shutting_down: bool,
71
72    /// Last transcription text shown in the idle display.
73    pub(crate) last_transcript: Option<String>,
74
75    /// Current audio level (0.0–1.0) for the level meter.
76    pub(crate) current_level: Option<f32>,
77
78    /// Current error message for the error display.
79    pub(crate) current_error: Option<String>,
80
81    /// Throttle: last time we rendered an AudioChunk update.
82    last_audio_render: Option<Instant>,
83}
84
85impl VoiceApp {
86    /// Creates a new `VoiceApp` from the given configuration.
87    ///
88    /// Loads the Whisper engine synchronously (blocking) before entering the
89    /// async context.  If the model is not yet downloaded the engine is set to
90    /// `None` and a warning is printed; the app will still start but
91    /// transcription will be unavailable until the model is downloaded.
92    pub fn new(config: AppConfig) -> Result<Self> {
93        // Load WhisperEngine synchronously (blocking) before entering async.
94        // This is intentional: whisper-rs model loading is CPU-bound and must
95        // not block the tokio runtime.
96        let whisper = if is_whisper_ready(&config.data_dir, &config.model_size) {
97            match WhisperEngine::new(&config.whisper_model_path) {
98                Ok(engine) => {
99                    Some(engine)
100                }
101                Err(e) => {
102                    eprintln!("[voice] Warning: failed to load Whisper model: {}", e);
103                    None
104                }
105            }
106        } else {
107            None
108        };
109
110        let bridge = OpenCodeBridge::new(
111            "http://localhost",
112            config.opencode_port,
113            config.server_password.clone(),
114        );
115
116        let audio_config = AudioConfig {
117            device: config.audio_device.clone(),
118            ..default_audio_config()
119        };
120
121        let (event_tx, event_rx) = tokio::sync::mpsc::unbounded_channel::<AppEvent>();
122
123        Ok(VoiceApp {
124            config,
125            state: RecordingState::Idle,
126            display: Display::new(),
127            bridge,
128            whisper,
129            approval_queue: ApprovalQueue::new(),
130            audio_config,
131            recorder: None,
132            event_tx,
133            event_rx,
134            cancel: CancellationToken::new(),
135            is_shutting_down: false,
136            last_transcript: None,
137            current_level: None,
138            current_error: None,
139            last_audio_render: None,
140        })
141    }
142
143    /// Starts the application: spawns background tasks and enters the event loop.
144    ///
145    /// This method does not return until the application shuts down.
146    pub async fn start(&mut self) -> Result<()> {
147        // Warn if Whisper model is not ready.
148        if self.whisper.is_none() {
149            eprintln!(
150                "[voice] Warning: Whisper model not found. Run 'opencode-voice setup' to download it."
151            );
152        }
153
154        // Warn (non-fatal) if OpenCode is not reachable.
155        if !self.bridge.is_connected().await {
156            eprintln!(
157                "[voice] Warning: Cannot connect to OpenCode at port {}. \
158                 Make sure OpenCode is running with --port {}.",
159                self.config.opencode_port, self.config.opencode_port
160            );
161        }
162
163        // Show welcome banner.
164        self.display.show_welcome(
165            &self.config.toggle_key.to_string(),
166            self.config.use_global_hotkey,
167            &self.config.global_hotkey,
168            self.config.push_to_talk,
169        );
170
171        // Spawn keyboard input on a dedicated OS thread (crossterm poll loop is blocking).
172        if is_tty() {
173            let kb_sender = self.event_tx.clone();
174            let kb_cancel = self.cancel.clone();
175            let toggle_key = self.config.toggle_key;
176
177            // We need an UnboundedSender<InputEvent> to pass to KeyboardInput.
178            // Bridge it through a small forwarding task.
179            let (input_tx, mut input_rx) =
180                tokio::sync::mpsc::unbounded_channel::<InputEvent>();
181
182            let event_tx_fwd = self.event_tx.clone();
183            tokio::spawn(async move {
184                while let Some(ev) = input_rx.recv().await {
185                    let _ = event_tx_fwd.send(AppEvent::Input(ev));
186                }
187            });
188
189            std::thread::spawn(move || {
190                let kb = KeyboardInput::new(toggle_key, input_tx, kb_cancel);
191                if let Err(e) = kb.run() {
192                    eprintln!("[voice] Keyboard input error: {}", e);
193                }
194                // Send Quit when the keyboard thread exits so the event loop
195                // knows to shut down.
196                let _ = kb_sender.send(AppEvent::Input(InputEvent::Quit));
197            });
198        }
199
200        // Spawn global hotkey on a dedicated OS thread (rdev::listen blocks).
201        if self.config.use_global_hotkey {
202            let hotkey_name = self.config.global_hotkey.clone();
203            let cancel = self.cancel.clone();
204
205            let (hotkey_tx, mut hotkey_rx) =
206                tokio::sync::mpsc::unbounded_channel::<InputEvent>();
207
208            let event_tx_fwd = self.event_tx.clone();
209            tokio::spawn(async move {
210                while let Some(ev) = hotkey_rx.recv().await {
211                    let _ = event_tx_fwd.send(AppEvent::Input(ev));
212                }
213            });
214
215            match GlobalHotkey::new(&hotkey_name, hotkey_tx, cancel) {
216                Ok(hotkey) => {
217                    std::thread::spawn(move || {
218                        if let Err(e) = hotkey.run() {
219                            eprintln!("[voice] Global hotkey error: {}", e);
220                        }
221                    });
222                }
223                Err(e) => {
224                    eprintln!("[voice] Warning: Could not set up global hotkey: {}", e);
225                }
226            }
227        }
228
229        // Spawn SSE event bridge if approval mode is enabled.
230        if self.config.approval_mode {
231            let (sse_tx, mut sse_rx) =
232                tokio::sync::mpsc::unbounded_channel::<SseEvent>();
233
234            let sse_client = OpenCodeEvents::new(
235                self.bridge.get_base_url().to_string(),
236                self.config.server_password.clone(),
237                sse_tx,
238            );
239            sse_client.start(self.cancel.clone());
240
241            // Forward SseEvent → AppEvent on a tokio task.
242            let event_tx_fwd = self.event_tx.clone();
243            tokio::spawn(async move {
244                while let Some(sse_event) = sse_rx.recv().await {
245                    let app_event = match sse_event {
246                        SseEvent::Connected => AppEvent::SseConnected,
247                        SseEvent::Disconnected(reason) => AppEvent::SseDisconnected(reason),
248                        SseEvent::PermissionAsked(req) => AppEvent::PermissionAsked(req),
249                        SseEvent::PermissionReplied {
250                            session_id,
251                            request_id,
252                            reply,
253                        } => AppEvent::PermissionReplied {
254                            session_id,
255                            request_id,
256                            reply,
257                        },
258                        SseEvent::QuestionAsked(req) => AppEvent::QuestionAsked(req),
259                        SseEvent::QuestionReplied {
260                            session_id,
261                            request_id,
262                            answers,
263                        } => AppEvent::QuestionReplied {
264                            session_id,
265                            request_id,
266                            answers,
267                        },
268                        SseEvent::QuestionRejected {
269                            session_id,
270                            request_id,
271                        } => AppEvent::QuestionRejected {
272                            session_id,
273                            request_id,
274                        },
275                    };
276                    if event_tx_fwd.send(app_event).is_err() {
277                        break;
278                    }
279                }
280            });
281        }
282
283        // Register SIGINT / SIGTERM signal handlers.
284        self.register_signal_handlers();
285
286        // Render initial idle state.
287        self.render_display();
288
289        // Enter the main event loop.
290        self.run_event_loop().await;
291
292        Ok(())
293    }
294
295    /// Registers OS signal handlers that send `AppEvent::Shutdown` on SIGINT/SIGTERM.
296    fn register_signal_handlers(&self) {
297        let tx_sigint = self.event_tx.clone();
298        let tx_sigterm = self.event_tx.clone();
299
300        tokio::spawn(async move {
301            if let Ok(mut sig) = tokio::signal::unix::signal(
302                tokio::signal::unix::SignalKind::interrupt(),
303            ) {
304                if sig.recv().await.is_some() {
305                    let _ = tx_sigint.send(AppEvent::Shutdown);
306                }
307            }
308        });
309
310        tokio::spawn(async move {
311            if let Ok(mut sig) = tokio::signal::unix::signal(
312                tokio::signal::unix::SignalKind::terminate(),
313            ) {
314                if sig.recv().await.is_some() {
315                    let _ = tx_sigterm.send(AppEvent::Shutdown);
316                }
317            }
318        });
319    }
320
321    /// The main event loop: receives `AppEvent`s and dispatches them.
322    async fn run_event_loop(&mut self) {
323        loop {
324            let event = match self.event_rx.recv().await {
325                Some(e) => e,
326                None => break, // All senders dropped — exit.
327            };
328
329            match event {
330                AppEvent::Input(input_event) => {
331                    self.handle_input(input_event).await;
332                    if self.is_shutting_down {
333                        break;
334                    }
335                }
336
337                AppEvent::SseConnected => {
338                    // Update display to reflect connectivity.
339                    self.render_display();
340                }
341
342                AppEvent::SseDisconnected(reason) => {
343                    if let Some(msg) = reason {
344                        self.display.log(&format!("[voice] SSE disconnected: {}", msg));
345                    }
346                    self.render_display();
347                }
348
349                AppEvent::PermissionAsked(req) => {
350                    approval::handle_sse_permission_asked(self, req);
351                }
352
353                AppEvent::PermissionReplied {
354                    session_id,
355                    request_id,
356                    reply,
357                } => {
358                    approval::handle_sse_permission_replied(self, &session_id, &request_id, &reply);
359                }
360
361                AppEvent::QuestionAsked(req) => {
362                    approval::handle_sse_question_asked(self, req);
363                }
364
365                AppEvent::QuestionReplied {
366                    session_id,
367                    request_id,
368                    answers,
369                } => {
370                    approval::handle_sse_question_replied(self, &session_id, &request_id, answers);
371                }
372
373                AppEvent::QuestionRejected {
374                    session_id,
375                    request_id,
376                } => {
377                    approval::handle_sse_question_rejected(self, &session_id, &request_id);
378                }
379
380                AppEvent::AudioChunk { rms_energy } => {
381                    self.current_level = Some(rms_energy);
382                    // Throttle display updates to ~10 fps to avoid spammy output.
383                    let now = Instant::now();
384                    let should_render = self
385                        .last_audio_render
386                        .map(|t| now.duration_since(t).as_millis() >= 100)
387                        .unwrap_or(true);
388                    if should_render {
389                        self.last_audio_render = Some(now);
390                        self.render_display();
391                    }
392                }
393
394                AppEvent::RecoverFromError => {
395                    if self.state == RecordingState::Error {
396                        self.state = RecordingState::Idle;
397                        self.current_error = None;
398                        self.render_display();
399                    }
400                }
401
402                AppEvent::Shutdown => {
403                    self.shutdown();
404                    break;
405                }
406            }
407        }
408    }
409
410    /// Handles an [`InputEvent`] from keyboard or global hotkey.
411    async fn handle_input(&mut self, event: InputEvent) {
412        match event {
413            InputEvent::Toggle => {
414                if self.config.push_to_talk {
415                    // PTT is driven by KeyDown/KeyUp; ignore Toggle to prevent
416                    // the global hotkey's KeyRelease (which sends both KeyUp
417                    // and Toggle) from re-starting recording after KeyUp
418                    // already stopped it.
419                } else {
420                    // Standard toggle mode.
421                    recording::handle_toggle(self).await;
422                }
423            }
424
425            InputEvent::KeyDown => {
426                if self.config.push_to_talk {
427                    recording::handle_push_to_talk_start(self).await;
428                }
429            }
430
431            InputEvent::KeyUp => {
432                if self.config.push_to_talk {
433                    recording::handle_push_to_talk_stop(self).await;
434                }
435            }
436
437            InputEvent::Quit => {
438                self.shutdown();
439            }
440        }
441    }
442
443    /// Transitions to the Error state, updates the display, and schedules
444    /// automatic recovery after 3 seconds.
445    pub(crate) fn handle_error(&mut self, err: &str) {
446        self.state = RecordingState::Error;
447        self.current_error = Some(err.to_string());
448        self.render_display();
449
450        // Schedule recovery: after 3 seconds send RecoverFromError.
451        let tx = self.event_tx.clone();
452        tokio::spawn(async move {
453            tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
454            let _ = tx.send(AppEvent::RecoverFromError);
455        });
456    }
457
458    /// Shuts down the application.
459    ///
460    /// Guarded by `is_shutting_down` to prevent double-shutdown.
461    /// Cancels the global cancellation token and clears the display.
462    pub(crate) fn shutdown(&mut self) {
463        if self.is_shutting_down {
464            return;
465        }
466        self.is_shutting_down = true;
467
468        // Cancel all background tasks.
469        self.cancel.cancel();
470
471        // Clear the terminal display.
472        self.display.clear();
473
474        eprintln!("[voice] Shutting down.");
475    }
476
477    /// Renders the current state to the terminal display.
478    pub(crate) fn render_display(&mut self) {
479        let toggle_key_str = self.config.toggle_key.to_string();
480        let approval = self.approval_queue.peek();
481        let approval_count = self.approval_queue.len();
482
483        // Read live duration from the active recorder.
484        let duration = self.recorder.as_ref().map(|r| r.duration());
485
486        // Amplify level for display — raw RMS from speech is typically
487        // 0.01–0.1, which would render as an empty bar at width 8.
488        let display_level = self.current_level.map(|l| (l * 5.0).min(1.0));
489
490        let global_hotkey_name = if self.config.use_global_hotkey {
491            Some(self.config.global_hotkey.as_str())
492        } else {
493            None
494        };
495
496        let meta = DisplayMeta {
497            level: display_level,
498            error: self.current_error.as_deref(),
499            toggle_key: Some(&toggle_key_str),
500            global_hotkey_name,
501            approval,
502            approval_count: Some(approval_count),
503            transcript: self.last_transcript.as_deref(),
504            duration,
505        };
506
507        self.display.update(self.state, &meta);
508    }
509}
510
511#[cfg(test)]
512mod tests {
513    use super::*;
514    use crate::config::{AppConfig, ModelSize};
515    use std::path::PathBuf;
516
517    /// Builds a minimal `AppConfig` suitable for unit tests.
518    fn test_config() -> AppConfig {
519        AppConfig {
520            whisper_model_path: PathBuf::from("/nonexistent/model.bin"),
521            opencode_port: 4096,
522            toggle_key: ' ',
523            model_size: ModelSize::TinyEn,
524            auto_submit: true,
525            server_password: None,
526            data_dir: PathBuf::from("/nonexistent/data"),
527            audio_device: None,
528            use_global_hotkey: false,
529            global_hotkey: "right_option".to_string(),
530            push_to_talk: true,
531            approval_mode: false,
532        }
533    }
534
535    #[test]
536    fn test_voice_app_new_initializes_idle_state() {
537        let config = test_config();
538        let app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
539        assert_eq!(app.state, RecordingState::Idle);
540    }
541
542    #[test]
543    fn test_voice_app_new_whisper_none_when_model_missing() {
544        let config = test_config();
545        let app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
546        // Model path is /nonexistent/model.bin — should not be loaded.
547        assert!(app.whisper.is_none());
548    }
549
550    #[test]
551    fn test_voice_app_new_approval_queue_empty() {
552        let config = test_config();
553        let app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
554        assert!(!app.approval_queue.has_pending());
555    }
556
557    #[test]
558    fn test_voice_app_new_not_shutting_down() {
559        let config = test_config();
560        let app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
561        assert!(!app.is_shutting_down);
562    }
563
564    #[test]
565    fn test_voice_app_shutdown_sets_flag() {
566        let config = test_config();
567        let mut app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
568        app.shutdown();
569        assert!(app.is_shutting_down);
570    }
571
572    #[test]
573    fn test_voice_app_shutdown_idempotent() {
574        let config = test_config();
575        let mut app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
576        app.shutdown();
577        app.shutdown(); // Second call must not panic.
578        assert!(app.is_shutting_down);
579    }
580
581    #[test]
582    fn test_voice_app_shutdown_cancels_token() {
583        let config = test_config();
584        let mut app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
585        assert!(!app.cancel.is_cancelled());
586        app.shutdown();
587        assert!(app.cancel.is_cancelled());
588    }
589
590    #[tokio::test]
591    async fn test_handle_error_sets_error_state() {
592        let config = test_config();
593        let mut app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
594        app.handle_error("test error");
595        assert_eq!(app.state, RecordingState::Error);
596        assert_eq!(app.current_error.as_deref(), Some("test error"));
597    }
598
599    #[tokio::test]
600    async fn test_recover_from_error_transitions_to_idle() {
601        let config = test_config();
602        let mut app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
603        app.state = RecordingState::Error;
604        app.current_error = Some("some error".to_string());
605
606        // Simulate the RecoverFromError event being processed.
607        if app.state == RecordingState::Error {
608            app.state = RecordingState::Idle;
609            app.current_error = None;
610        }
611
612        assert_eq!(app.state, RecordingState::Idle);
613        assert!(app.current_error.is_none());
614    }
615
616    #[test]
617    fn test_voice_app_event_channel_works() {
618        let config = test_config();
619        let app = VoiceApp::new(config).expect("VoiceApp::new should succeed");
620        // Sending an event should succeed while the receiver is alive.
621        let result = app.event_tx.send(AppEvent::Shutdown);
622        assert!(result.is_ok());
623    }
624}