mod real_audio;
use anyhow::Result;
use std::sync::{Arc, Mutex};
use tauri::{AppHandle, Manager};
use tracing::{info, error};
use real_audio::RealAudioRecorder;
use crate::ai::{SpeechProcessor, SileroVAD, WhisperSTT};
pub struct AudioRecorder {
app_handle: AppHandle,
is_recording: Arc<Mutex<bool>>,
real_recorder: Option<RealAudioRecorder>,
sample_rate: u32,
speech_processor: Arc<Mutex<SpeechProcessor>>,
vad: Option<SileroVAD>,
whisper: Option<Arc<WhisperSTT>>,
}
impl AudioRecorder {
pub fn new(app_handle: AppHandle) -> Self {
let real_recorder = match RealAudioRecorder::new() {
Ok(recorder) => {
info!("Real audio recorder initialized successfully");
Some(recorder)
}
Err(e) => {
error!("Failed to initialize real audio recorder: {}. Falling back to simulation.", e);
None
}
};
let vad = match SileroVAD::new() {
Ok(vad) => {
info!("Silero VAD initialized successfully");
Some(vad)
}
Err(e) => {
error!("Failed to initialize VAD: {}", e);
None
}
};
let mut speech_processor = SpeechProcessor::new();
speech_processor.set_sample_rate(16000);
Self {
app_handle,
is_recording: Arc::new(Mutex::new(false)),
real_recorder,
sample_rate: 16000,
speech_processor: Arc::new(Mutex::new(speech_processor)),
vad,
whisper: None,
}
}
pub async fn start_recording(&self) -> Result<()> {
{
let mut is_recording = self.is_recording.lock().unwrap();
if *is_recording {
return Err(anyhow::anyhow!("Already recording"));
}
*is_recording = true;
}
info!("Starting audio recording");
let _ = self.app_handle.emit_all("recording-started", ());
if let Some(ref recorder) = self.real_recorder {
info!("Using real microphone input");
recorder.start_recording()?;
} else {
info!("Using simulated audio (real audio not available)");
let is_recording = Arc::clone(&self.is_recording);
tokio::spawn(async move {
while *is_recording.lock().unwrap() {
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
}
});
}
Ok(())
}
pub async fn stop_recording(&self) -> Result<Vec<f32>> {
{
let mut is_recording = self.is_recording.lock().unwrap();
if !*is_recording {
return Err(anyhow::anyhow!("Not recording"));
}
*is_recording = false;
}
let audio_data = if let Some(ref recorder) = self.real_recorder {
recorder.stop_recording()?
} else {
vec![]
};
let _ = self.app_handle.emit_all("recording-stopped", audio_data.len());
info!("Recorded {} samples ({:.2} seconds)", audio_data.len(), audio_data.len() as f32 / self.sample_rate as f32);
Ok(audio_data)
}
pub fn is_recording(&self) -> bool {
*self.is_recording.lock().unwrap()
}
pub async fn save_to_wav(&self, audio_data: &[f32], path: &str) -> Result<()> {
let spec = hound::WavSpec {
channels: 1,
sample_rate: self.sample_rate,
bits_per_sample: 16,
sample_format: hound::SampleFormat::Int,
};
let mut writer = hound::WavWriter::create(path, spec)?;
for &sample in audio_data {
let amplitude = (sample * i16::MAX as f32) as i16;
writer.write_sample(amplitude)?;
}
writer.finalize()?;
info!("Saved audio to {}", path);
Ok(())
}
pub async fn process_audio_stream(&self) -> Result<()> {
if !self.is_recording() {
return Err(anyhow::anyhow!("Not recording"));
}
let audio_chunk = if let Some(ref recorder) = self.real_recorder {
recorder.get_current_buffer()?
} else {
vec![]
};
if audio_chunk.is_empty() {
return Ok(());
}
info!("Processing audio chunk with {} samples", audio_chunk.len());
let has_speech = if let Some(ref vad) = self.vad {
vad.detect_speech(&audio_chunk)?
} else {
audio_chunk.iter().any(|&x| x.abs() > 0.1)
};
let should_transcribe = {
let mut processor = self.speech_processor.lock().unwrap();
processor.process_audio_chunk_sync(&audio_chunk, has_speech, self.sample_rate)
};
if should_transcribe {
let audio_buffer = {
let mut processor = self.speech_processor.lock().unwrap();
processor.get_and_clear_audio_buffer()
};
if !audio_buffer.is_empty() {
let text = if let Some(ref whisper) = self.whisper {
match whisper.transcribe(&audio_buffer, self.sample_rate).await {
Ok(text) if !text.is_empty() => {
info!("Transcribed: {}", text);
Some(text)
}
_ => None,
}
} else {
Some("[Whisper not loaded - please wait]".to_string())
};
if let Some(text) = text {
let _ = self.app_handle.emit_all("speech-transcribed", text);
}
}
}
{
let processor = self.speech_processor.lock().unwrap();
if processor.should_interrupt_assistant() {
let _ = self.app_handle.emit_all("interrupt-assistant", ());
}
}
Ok(())
}
pub fn get_speech_processor(&self) -> Arc<Mutex<SpeechProcessor>> {
Arc::clone(&self.speech_processor)
}
pub async fn initialize_whisper(&mut self) -> Result<()> {
info!("Initializing Whisper speech-to-text...");
let mut whisper = WhisperSTT::new(self.app_handle.clone())?;
whisper.initialize().await?;
let whisper_arc = Arc::new(whisper);
self.whisper = Some(whisper_arc.clone());
let mut processor = self.speech_processor.lock().unwrap();
processor.set_whisper(whisper_arc);
info!("Whisper initialized successfully");
Ok(())
}
}