1use ffmpeg_sidecar::{command::FfmpegCommand, event::FfmpegEvent};
5use std::env;
6use std::fs;
7use std::io::{self, Write};
8use std::path::Path;
9use std::process::Command;
10use std::time::{Duration, Instant};
11
12fn main() -> anyhow::Result<()> {
13 let _guard = temporarily_use_ffmpeg_from_system_path()?;
14
15 download_whisper_model()?;
17
18 let audio_device = find_default_audio_device()?;
20 println!("Listening to audio device: {}", audio_device);
21 println!("Starting real-time transcription... (Say 'stop recording' or press Ctrl+C to stop)");
22
23 let whisper_filter = "whisper=model=./whisper.cpp/models/ggml-base.en.bin:destination=-:queue=2";
27
28 let mut command = FfmpegCommand::new();
29
30 if cfg!(windows) {
32 command
33 .format("dshow")
34 .args("-audio_buffer_size 50".split(' ')) .input(format!("audio={}", audio_device));
36 } else {
37 command
39 .format("pulse") .input("default");
41 }
42
43 let iter = command
44 .arg("-af")
45 .arg(&whisper_filter)
46 .format("null")
47 .output("-")
48 .spawn()?
49 .iter()?;
50
51 let mut transcription_parts = Vec::new();
52 let mut last_transcription_time = Instant::now();
53 let pause_threshold = Duration::from_secs(2); for event in iter {
56 match event {
57 FfmpegEvent::ParsedConfiguration(config) => {
58 if !config
59 .configuration
60 .contains(&"--enable-whisper".to_string())
61 {
62 anyhow::bail!("FFmpeg was not built with Whisper support (--enable-whisper)");
63 }
64 }
65 FfmpegEvent::OutputChunk(chunk) => {
66 if let Ok(text) = String::from_utf8(chunk) {
68 let trimmed = text.trim();
69 if !trimmed.is_empty() {
70 let now = Instant::now();
71
72 if now.duration_since(last_transcription_time) > pause_threshold
74 && !transcription_parts.is_empty()
75 {
76 println!(); transcription_parts.clear();
79 }
80
81 let test_text = format!("{} {}", transcription_parts.join(" "), trimmed).to_lowercase();
83 if test_text.contains("stop recording") {
84 print!("{} {}", transcription_parts.join(" "), trimmed);
85 println!("\nStop command detected. Ending transcription session.");
86 break;
87 }
88
89 transcription_parts.push(trimmed.to_string());
90
91 print!(" {}", trimmed);
93 io::stdout().flush().unwrap();
94
95 last_transcription_time = now;
96 }
97 }
98 }
99 FfmpegEvent::Done => {
100 println!("\nTranscription complete!");
101 break;
102 }
103 _ => {}
104 }
105 }
106
107 Ok(())
108}
109
110fn find_default_audio_device() -> anyhow::Result<String> {
111 if cfg!(windows) {
112 let audio_device = FfmpegCommand::new()
114 .hide_banner()
115 .args(["-list_devices", "true"])
116 .format("dshow")
117 .input("dummy")
118 .spawn()?
119 .iter()?
120 .into_ffmpeg_stderr()
121 .find(|line| line.contains("(audio)"))
122 .and_then(|line| line.split('\"').nth(1).map(|s| s.to_string()))
123 .ok_or_else(|| anyhow::anyhow!("No audio device found on Windows"))?;
124
125 Ok(audio_device)
126 } else {
127 println!("Note: Using default audio device. On Linux/Mac, you may need to adjust audio format and device.");
129 Ok("default".to_string())
130 }
131}
132
133fn download_whisper_model() -> anyhow::Result<()> {
134 let model_path = Path::new("whisper.cpp/models/ggml-base.en.bin");
135
136 if model_path.exists() {
138 println!("Whisper model already exists at {}", model_path.display());
139 return Ok(());
140 }
141
142 println!("Downloading whisper.cpp and base.en model...");
143
144 if !Path::new("whisper.cpp").exists() {
146 println!("Cloning whisper.cpp repository...");
147 let output = Command::new("git")
148 .args(&["clone", "https://github.com/ggml-org/whisper.cpp.git"])
149 .output()?;
150
151 if !output.status.success() {
152 anyhow::bail!(
153 "Failed to clone whisper.cpp: {}",
154 String::from_utf8_lossy(&output.stderr)
155 );
156 }
157 }
158
159 println!("Downloading base.en model...");
161 let output = Command::new("sh")
162 .args(&["./models/download-ggml-model.sh", "base.en"])
163 .current_dir("whisper.cpp")
164 .output()?;
165
166 if !output.status.success() {
167 anyhow::bail!(
168 "Failed to download model: {}",
169 String::from_utf8_lossy(&output.stderr)
170 );
171 }
172
173 println!(
174 "Successfully downloaded whisper model to {}",
175 model_path.display()
176 );
177 Ok(())
178}
179
180fn temporarily_use_ffmpeg_from_system_path() -> anyhow::Result<RestoreGuard> {
184 let exe_dir = env::current_exe()?.parent().unwrap().to_path_buf();
186
187 let ffmpeg_names = ["ffmpeg", "ffmpeg.exe"];
189 let mut renamed_paths = Vec::new();
190
191 for name in &ffmpeg_names {
193 let ffmpeg_path = exe_dir.join(name);
194 if ffmpeg_path.exists() {
195 let backup_path = exe_dir.join(format!("{}.backup", name));
196 fs::rename(&ffmpeg_path, &backup_path)?;
197 println!(
198 "Temporarily renamed {} to {}",
199 ffmpeg_path.display(),
200 backup_path.display()
201 );
202 renamed_paths.push((ffmpeg_path, backup_path));
203 }
204 }
205
206 Ok(RestoreGuard { renamed_paths })
207}
208
209struct RestoreGuard {
210 renamed_paths: Vec<(std::path::PathBuf, std::path::PathBuf)>,
211}
212
213impl Drop for RestoreGuard {
214 fn drop(&mut self) {
215 for (original, backup) in &self.renamed_paths {
216 if let Err(e) = fs::rename(backup, original) {
217 eprintln!("Failed to restore {}: {}", original.display(), e);
218 } else {
219 println!("Restored {}", original.display());
220 }
221 }
222 }
223}