stt-cli 0.2.1 - Docs.rs

I'll provide code fragments and instructions to effectively integrate the new features into the codebase. Let's approach this methodically.

# Modularization and Implementation Approach

## 1. First, define a new configuration structure

```rust
// src/config.rs - New file

use clap::{Parser, ValueEnum};
use std::path::PathBuf;

#[derive(Debug, Clone, ValueEnum)]
pub enum TranscriptionMode {
    AlwaysOn,
    Hotkey,
}

#[derive(Parser, Debug, Clone)]
#[command(author, version, about, long_about = None)]
pub struct AppConfig {
    /// Audio device name to use
    #[arg(short, long)]
    pub device: Option<String>,
    
    /// Transcription activation mode
    #[arg(short, long, value_enum, default_value = "always-on")]
    pub mode: TranscriptionMode,
    
    /// Hotkey for toggling recording (when in hotkey mode)
    #[arg(short = 'k', long, default_value = "ctrl+space")]
    pub hotkey: String,
    
    /// Directory to store data
    #[arg(long, default_value = "data_dir")]
    pub data_dir: PathBuf,
    
    /// Enable debug mode
    #[arg(short, long)]
    pub debug: bool,
}

impl Default for AppConfig {
    fn default() -> Self {
        Self {
            device: None,
            mode: TranscriptionMode::AlwaysOn,
            hotkey: "ctrl+space".to_string(),
            data_dir: PathBuf::from("data_dir"),
            debug: false,
        }
    }
}
```

## 2. Enhance the state model with recording control

```rust
// src/audio_state.rs - New file

use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;

#[derive(Debug, Clone)]
pub struct RecordingState {
    active: Arc<AtomicBool>,
}

impl RecordingState {
    pub fn new(initially_active: bool) -> Self {
        Self {
            active: Arc::new(AtomicBool::new(initially_active)),
        }
    }
    
    pub fn is_active(&self) -> bool {
        self.active.load(Ordering::SeqCst)
    }
    
    pub fn set_active(&self, active: bool) {
        self.active.store(active, Ordering::SeqCst);
    }
    
    pub fn toggle(&self) -> bool {
        let previous = self.active.fetch_xor(true, Ordering::SeqCst);
        !previous
    }
}
```

## 3. Implement the hotkey service

```rust
// src/hotkey_service.rs - New file

use crate::audio_state::RecordingState;
use anyhow::Result;
use global_hotkey::{GlobalHotKeyManager, HotKeyState, hotkey::HotKey};
use std::str::FromStr;
use tokio::sync::mpsc;
use tracing::{info, error, debug};

pub struct HotkeyService {
    manager: GlobalHotKeyManager,
    recording_state: RecordingState,
}

impl HotkeyService {
    pub fn new(recording_state: RecordingState) -> Result<Self> {
        let manager = GlobalHotKeyManager::new()?;
        Ok(Self { manager, recording_state })
    }
    
    pub fn register_hotkey(&self, hotkey_str: &str) -> Result<()> {
        let hotkey = HotKey::from_str(hotkey_str)?;
        self.manager.register(hotkey)?;
        Ok(())
    }
    
    pub async fn run(&self, mut rx: mpsc::Receiver<HotKeyState>) {
        info!("Hotkey service started");
        
        while let Some(state) = rx.recv().await {
            if state.state() {
                debug!("Hotkey pressed");
                let is_now_active = self.recording_state.toggle();
                info!("Recording {}", if is_now_active { "started" } else { "paused" });
                
                // Here you could trigger visual/audio feedback
            }
        }
        
        info!("Hotkey service stopped");
    }
}
```

## 4. Update the AudioBuffer to use the new recording state

```rust
// Modified AudioBuffer in src/main.rs

#[derive(Debug)]
struct AudioBuffer {
    samples: Vec<f32>,
    recording_state: RecordingState,
}

impl AudioBuffer {
    fn new(recording_state: RecordingState) -> Self {
        Self {
            samples: Vec::with_capacity(CHUNK_SIZE * 2),
            recording_state,
        }
    }
    
    fn add_samples(&mut self, samples: &[f32]) {
        if self.recording_state.is_active() {
            self.samples.extend_from_slice(samples);
        }
    }
    
    // The stop_recording, start_recording methods can now use the recording_state
}
```

## 5. Add device listing and selection functionality

```rust
// src/audio_utils.rs - New file

use anyhow::{Context, Result};
use cpal::{traits::{DeviceTrait, HostTrait}, Device, Host};
use std::io::{self, Write};
use tracing::info;

pub fn list_audio_devices(host: &Host) -> Result<Vec<(usize, String)>> {
    let devices = host.input_devices()
        .context("Failed to get input devices")?;
    
    let mut device_list = Vec::new();
    println!("Available audio input devices:");
    
    for (idx, device) in devices.enumerate() {
        if let Ok(name) = device.name() {
            println!("{}: {}", idx + 1, name);
            device_list.push((idx, name));
        }
    }
    
    Ok(device_list)
}

pub fn select_audio_device(host: &Host) -> Result<Device> {
    let device_list = list_audio_devices(host)?;
    
    if device_list.is_empty() {
        return host.default_input_device()
            .context("No audio input devices found");
    }
    
    print!("Select device number [1-{}] (or press Enter for default): ", device_list.len());
    io::stdout().flush()?;
    
    let mut input = String::new();
    io::stdin().read_line(&mut input)?;
    
    let selection = input.trim();
    if selection.is_empty() {
        info!("Using default device");
        return host.default_input_device()
            .context("No default input device available");
    }
    
    let idx = selection.parse::<usize>().context("Invalid selection")?;
    if idx < 1 || idx > device_list.len() {
        return Err(anyhow::anyhow!("Selection out of range"));
    }
    
    let device_name = &device_list[idx - 1].1;
    info!("Selected device: {}", device_name);
    
    host.input_devices()?
        .find(|d| d.name().map(|n| &n == device_name).unwrap_or(false))
        .context("Device not found")
}
```

## 6. Modify the command system to support recording toggle

```rust
// Update CpalCommand enum in src/main.rs

#[derive(Debug)]
enum CpalCommand {
    Start(String),     // Device name
    Stop,
    ToggleRecording,   // New command
    Shutdown,          // To cleanly terminate the thread
}

// Update AudioCommand enum

#[derive(Debug, Clone)]
enum AudioCommand {
    StartRecordingByName(String),
    StopRecording,
    ToggleRecording,   // New command
}
```

## 7. Update the main function to incorporate CLI args and set up the system

```rust
// Update main() in src/main.rs

#[tokio::main]
async fn main() -> Result<()> {
    // Parse command line arguments
    let app_config = AppConfig::parse();
    
    // Set up tracing with debug configuration from app_config
    let tracing_config = my_tracing::TracingConfig {
        debug_mode: app_config.debug,
        data_dir: app_config.data_dir.to_string_lossy().to_string(),
    };
    my_tracing::initialize(tracing_config.clone())?;
    
    info!("App starting...");
    println!(
        "Audio Transcription Demo | Mode: {:?} | Debug: {} | Data: {}",
        app_config.mode, app_config.debug, app_config.data_dir.display()
    );
    
    // Initialize the host
    let host = cpal::default_host();
    
    // Get the audio device - either from CLI args or via selection
    let device = if let Some(device_name) = &app_config.device {
        info!("Using device specified in arguments: {}", device_name);
        find_device_by_name_sync(&host, device_name)
            .context("Failed to find specified device")?
    } else {
        // Present device selection UI
        select_audio_device(&host)
            .context("Failed to select audio device")?
    };
    
    let device_name = device.name()?;
    info!("Selected audio device: {}", device_name);
    
    // Initialize recording state based on mode
    let initially_active = matches!(app_config.mode, TranscriptionMode::AlwaysOn);
    let recording_state = RecordingState::new(initially_active);
    
    println!("Recording initially {}", if initially_active { "ACTIVE" } else { "PAUSED" });
    if matches!(app_config.mode, TranscriptionMode::Hotkey) {
        println!("Press {} to toggle recording", app_config.hotkey);
    }
    
    // Initialize the AudioBuffer with the recording state
    let buffer = Arc::new(StdMutex::new(AudioBuffer::new(recording_state.clone())));
    
    // Set up channels (similar to original code, but with the new command)
    // ...
    
    // Set up hotkey service if in hotkey mode
    if matches!(app_config.mode, TranscriptionMode::Hotkey) {
        let hotkey_service = HotkeyService::new(recording_state.clone())?;
        hotkey_service.register_hotkey(&app_config.hotkey)?;
        
        let (hotkey_tx, hotkey_rx) = mpsc::channel(10);
        tokio::spawn(async move {
            hotkey_service.run(hotkey_rx).await;
        });
        
        // Set up event listener to send hotkey events to our channel
        let event_channel = GlobalHotKeyManager::register(hotkey_tx)?;
        tokio::spawn(async move {
            listen_for_hotkey_events(event_channel).await;
        });
    }
    
    // Spawn audio-related tasks as in original code...
    // ...
    
    // Rest of the main function follows the original structure
    // ...
}

// Helper function for hotkey event listening
async fn listen_for_hotkey_events(mut rx: mpsc::Receiver<GlobalHotKeyEvent>) {
    while let Some(event) = rx.recv().await {
        // Process the event...
    }
}
```

## 8. Update the audio processing components to respect the recording state

```rust
// Update the setup_audio_input_sync function in src/main.rs to use the recording state

fn setup_audio_input_sync(
    device: &Device,
    sender: FuturesSender<Vec<f32>>,
    buffer: Arc<StdMutex<AudioBuffer>>,
) -> Result<Stream> {
    // Config selection remains the same
    
    // Stream building with modified callback behavior
    let mut chunk_samples: Vec<f32> = Vec::with_capacity(CHUNK_SIZE * 2);
    let mut sender = sender;
    
    let stream = device.build_input_stream(
        &supported_config.config(),
        move |data: &[f32], _: &cpal::InputCallbackInfo| {
            let mut buffer_guard = match buffer.lock() {
                Ok(guard) => guard,
                Err(poisoned) => {
                    error!("Audio buffer mutex poisoned! Recovering.");
                    poisoned.into_inner()
                }
            };
            
            // This will only add samples if recording is active
            buffer_guard.add_samples(data);
            
            // Only process audio when recording is active
            if buffer_guard.recording_state.is_active() {
                chunk_samples.extend_from_slice(data);
                
                while chunk_samples.len() >= CHUNK_SIZE {
                    let samples_to_send: Vec<f32> = chunk_samples.drain(..CHUNK_SIZE).collect();
                    if let Err(e) = futures::executor::block_on(sender.send(samples_to_send)) {
                        error!("Audio CB: Failed send chunk: {}", e);
                    } else {
                        trace!("Audio CB: Sent chunk");
                    }
                }
            }
        },
        |err| error!("Audio stream error callback: {}", err),
        None,
    )?;
    
    Ok(stream)
}
```

## 9. Update the relay actor to handle the new toggle command

```rust
// Update the audio_command_relay_actor function in src/main.rs

async fn audio_command_relay_actor(
    mut command_receiver: tokio_mpsc::Receiver<AudioCommand>,
    cpal_thread_sender: std_mpsc::Sender<CpalCommand>,
    recording_state: RecordingState,
) {
    info!("Audio Command Relay Actor started.");
    
    while let Some(command) = command_receiver.recv().await {
        debug!("Relay Actor: Received command: {:?}", command);
        let cpal_command = match command {
            AudioCommand::StartRecordingByName(name) => CpalCommand::Start(name),
            AudioCommand::StopRecording => CpalCommand::Stop,
            AudioCommand::ToggleRecording => {
                // Toggle state and propagate to CPAL thread
                let is_now_active = recording_state.toggle();
                info!("Recording {}", if is_now_active { "resumed" } else { "paused" });
                CpalCommand::ToggleRecording
            }
        };
        
        // Send to CPAL thread as before
        if let Err(e) = cpal_thread_sender.send(cpal_command) {
            error!("Relay Actor: Failed to send command to CPAL thread: {}", e);
            break;
        }
    }
    
    // Shutdown logic remains the same
}
```

## 10. Finally, update the CPAL thread to handle toggle commands

```rust
// Update the cpal_audio_thread function in src/main.rs

fn cpal_audio_thread(
    command_receiver: std_mpsc::Receiver<CpalCommand>,
    audio_chunk_sender: FuturesSender<Vec<f32>>,
    buffer: Arc<StdMutex<AudioBuffer>>,
) {
    info!("Dedicated CPAL audio thread started.");
    // Existing initialization code
    
    loop {
        match command_receiver.recv() {
            Ok(command) => {
                debug!("CPAL Thread: Received command: {:?}", command);
                match command {
                    // Existing Start and Stop handlers
                    
                    CpalCommand::ToggleRecording => {
                        // The buffer now uses the shared recording state, which is 
                        // already toggled by the relay actor, so we don't need 
                        // to do anything else here except acknowledge
                        debug!("CPAL Thread: Recording toggle processed");
                    },
                    
                    CpalCommand::Shutdown => {
                        // Existing shutdown logic
                    }
                }
            },
            Err(e) => {
                // Existing error handling
            }
        }
    }
}
```

## Dependencies to Add to Cargo.toml

```toml
[dependencies]
# Existing dependencies...

# New dependencies
clap = { version = "4.3", features = ["derive"] }
global-hotkey = "0.3"
```

This modular approach separates concerns cleanly and provides a solid foundation for extending the application further. The code structure is now more maintainable with better separation between configuration, state management, device handling, and input processing.