mod sense_voice;
use crate::asr::sense_voice::SenseVoiceConfig;
use crate::error::*;
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc::{Receiver, UnboundedSender};
use tracing::debug;
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct AutomaticSpeechRecognitionConfig {
pub sense_voice_config: SenseVoiceConfig,
}
pub struct AutomaticSpeechRecognition {
sense_voice: sense_voice::SenseVoice,
speech_receiver: Receiver<Vec<f32>>,
text_sender: UnboundedSender<String>,
}
impl AutomaticSpeechRecognition {
pub fn init(
config: AutomaticSpeechRecognitionConfig,
speech_receiver: Receiver<Vec<f32>>,
text_sender: UnboundedSender<String>,
) -> Result<Self> {
debug!(
"Initializing automatic speech recognition with config {:?}",
&config
);
let sense_voice = sense_voice::SenseVoice::init(config.sense_voice_config.clone())?;
Ok(Self {
sense_voice,
speech_receiver,
text_sender,
})
}
pub async fn run(&mut self) -> Result<()> {
while let Some(speech) = self.speech_receiver.recv().await {
debug!("Received speech, len: {}", speech.len());
let asr_result = self.sense_voice.infer(speech)?;
debug!("Asr result: {:?}", asr_result);
self.text_sender.send(asr_result)?;
}
Ok(())
}
}