use ffmpeg_sidecar::{command::FfmpegCommand, event::FfmpegEvent};
use std::env;
use std::fs;
use std::io::{self, Write};
use std::path::Path;
use std::process::Command;
use std::time::{Duration, Instant};
fn main() -> anyhow::Result<()> {
let _guard = temporarily_use_ffmpeg_from_system_path()?;
download_whisper_model()?;
let audio_device = find_default_audio_device()?;
println!("Listening to audio device: {}", audio_device);
println!("Starting real-time transcription... (Say 'stop recording' or press Ctrl+C to stop)");
let whisper_filter = "whisper=model=./whisper.cpp/models/ggml-base.en.bin:destination=-:queue=2";
let mut command = FfmpegCommand::new();
if cfg!(windows) {
command
.format("dshow")
.args("-audio_buffer_size 50".split(' ')) .input(format!("audio={}", audio_device));
} else {
command
.format("pulse") .input("default");
}
let iter = command
.arg("-af")
.arg(&whisper_filter)
.format("null")
.output("-")
.spawn()?
.iter()?;
let mut transcription_parts = Vec::new();
let mut last_transcription_time = Instant::now();
let pause_threshold = Duration::from_secs(2);
for event in iter {
match event {
FfmpegEvent::ParsedConfiguration(config) => {
if !config
.configuration
.contains(&"--enable-whisper".to_string())
{
anyhow::bail!("FFmpeg was not built with Whisper support (--enable-whisper)");
}
}
FfmpegEvent::OutputChunk(chunk) => {
if let Ok(text) = String::from_utf8(chunk) {
let trimmed = text.trim();
if !trimmed.is_empty() {
let now = Instant::now();
if now.duration_since(last_transcription_time) > pause_threshold
&& !transcription_parts.is_empty()
{
println!(); transcription_parts.clear();
}
let test_text = format!("{} {}", transcription_parts.join(" "), trimmed).to_lowercase();
if test_text.contains("stop recording") {
print!("{} {}", transcription_parts.join(" "), trimmed);
println!("\nStop command detected. Ending transcription session.");
break;
}
transcription_parts.push(trimmed.to_string());
print!(" {}", trimmed);
io::stdout().flush().unwrap();
last_transcription_time = now;
}
}
}
FfmpegEvent::Done => {
println!("\nTranscription complete!");
break;
}
_ => {}
}
}
Ok(())
}
fn find_default_audio_device() -> anyhow::Result<String> {
if cfg!(windows) {
let audio_device = FfmpegCommand::new()
.hide_banner()
.args(["-list_devices", "true"])
.format("dshow")
.input("dummy")
.spawn()?
.iter()?
.into_ffmpeg_stderr()
.find(|line| line.contains("(audio)"))
.and_then(|line| line.split('\"').nth(1).map(|s| s.to_string()))
.ok_or_else(|| anyhow::anyhow!("No audio device found on Windows"))?;
Ok(audio_device)
} else {
println!("Note: Using default audio device. On Linux/Mac, you may need to adjust audio format and device.");
Ok("default".to_string())
}
}
fn download_whisper_model() -> anyhow::Result<()> {
let model_path = Path::new("whisper.cpp/models/ggml-base.en.bin");
if model_path.exists() {
println!("Whisper model already exists at {}", model_path.display());
return Ok(());
}
println!("Downloading whisper.cpp and base.en model...");
if !Path::new("whisper.cpp").exists() {
println!("Cloning whisper.cpp repository...");
let output = Command::new("git")
.args(&["clone", "https://github.com/ggml-org/whisper.cpp.git"])
.output()?;
if !output.status.success() {
anyhow::bail!(
"Failed to clone whisper.cpp: {}",
String::from_utf8_lossy(&output.stderr)
);
}
}
println!("Downloading base.en model...");
let output = Command::new("sh")
.args(&["./models/download-ggml-model.sh", "base.en"])
.current_dir("whisper.cpp")
.output()?;
if !output.status.success() {
anyhow::bail!(
"Failed to download model: {}",
String::from_utf8_lossy(&output.stderr)
);
}
println!(
"Successfully downloaded whisper model to {}",
model_path.display()
);
Ok(())
}
fn temporarily_use_ffmpeg_from_system_path() -> anyhow::Result<RestoreGuard> {
let exe_dir = env::current_exe()?.parent().unwrap().to_path_buf();
let ffmpeg_names = ["ffmpeg", "ffmpeg.exe"];
let mut renamed_paths = Vec::new();
for name in &ffmpeg_names {
let ffmpeg_path = exe_dir.join(name);
if ffmpeg_path.exists() {
let backup_path = exe_dir.join(format!("{}.backup", name));
fs::rename(&ffmpeg_path, &backup_path)?;
println!(
"Temporarily renamed {} to {}",
ffmpeg_path.display(),
backup_path.display()
);
renamed_paths.push((ffmpeg_path, backup_path));
}
}
Ok(RestoreGuard { renamed_paths })
}
struct RestoreGuard {
renamed_paths: Vec<(std::path::PathBuf, std::path::PathBuf)>,
}
impl Drop for RestoreGuard {
fn drop(&mut self) {
for (original, backup) in &self.renamed_paths {
if let Err(e) = fs::rename(backup, original) {
eprintln!("Failed to restore {}: {}", original.display(), e);
} else {
println!("Restored {}", original.display());
}
}
}
}