1use std::path::PathBuf;
2
3use clap::{Args, Parser, Subcommand, ValueEnum, ValueHint};
4
5pub fn long_version() -> &'static str {
6 concat!(
7 env!("CARGO_PKG_VERSION"),
8 " (",
9 env!("GIT_SHA"),
10 " ",
11 env!("BUILD_DATE"),
12 " ",
13 env!("TARGET"),
14 ")"
15 )
16}
17
18#[derive(Debug, Parser)]
23#[command(
24 name = "whisper-macos-cli",
25 version,
26 long_version = long_version(),
27 propagate_version = true,
28 arg_required_else_help = true,
29 max_term_width = 100,
30 after_help = "\
31EXAMPLES:
32 whisper-macos-cli transcribe voice.ogg
33 whisper-macos-cli transcribe --model base --language pt audio.mp3
34 whisper-macos-cli transcribe --timestamps --ndjson *.ogg
35 cat audio.wav | whisper-macos-cli transcribe
36 whisper-macos-cli models download base
37 whisper-macos-cli doctor
38 whisper-macos-cli commands --format json
39
40ENVIRONMENT:
41 WHISPER_MODEL Override default model (e.g. base, small, medium)
42 WHISPER_LANGUAGE Override default language (e.g. pt, en, es, auto)
43 NO_COLOR Disable colored output (see https://no-color.org)
44 CI Disable all interactive prompts when set to true
45 RUST_LOG Override tracing log level filter
46 SOURCE_DATE_EPOCH Unix timestamp for reproducible builds
47
48EXIT STATUS:
49 0 Success
50 2 Usage error (invalid arguments)
51 64 No input provided
52 65 Invalid input data (corrupt audio, unsupported format)
53 66 Input file not found
54 69 Service unavailable (download failed, unsupported platform)
55 70 Internal error (whisper inference failed)
56 74 I/O error
57 78 Configuration error (model not found)
58 130 Interrupted (SIGINT / Ctrl+C)
59 141 Broken pipe (SIGPIPE)
60 143 Terminated (SIGTERM)
61
62FILES:
63 ~/Library/Application Support/whisper-macos-cli/models/
64 Downloaded Whisper model files (ggml-*.bin)
65
66SEE ALSO:
67 Project: https://github.com/daniloaguiarbr/whisper-macos-cli
68 whisper.cpp: https://github.com/ggml-org/whisper.cpp
69
70BUGS:
71 Report bugs at https://github.com/daniloaguiarbr/whisper-macos-cli/issues"
72)]
73pub struct Cli {
74 #[command(subcommand)]
75 pub command: Option<Commands>,
76
77 #[arg(long, global = true, env = "QUIET")]
79 pub quiet: bool,
80
81 #[arg(short, long, global = true, action = clap::ArgAction::Count)]
83 pub verbose: u8,
84
85 #[arg(long, global = true)]
87 pub print_schema: bool,
88
89 #[arg(long, global = true)]
91 pub print_config: bool,
92
93 #[arg(long, global = true, env = "NO_INPUT")]
95 pub no_input: bool,
96
97 #[arg(long, global = true, value_name = "WHEN", default_value = "auto")]
99 pub color: ColorChoice,
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
103pub enum ColorChoice {
104 Auto,
105 Always,
106 Never,
107}
108
109#[derive(Debug, Subcommand)]
110pub enum Commands {
111 Transcribe(TranscribeArgs),
113 Models {
115 #[command(subcommand)]
116 action: ModelsAction,
117 },
118 Doctor,
120 Schema,
122 Config,
124 Completions {
126 #[arg(value_name = "SHELL")]
128 shell: clap_complete::Shell,
129 },
130 Commands {
132 #[arg(long, value_name = "FMT", default_value = "json")]
134 format: CommandsFormat,
135 },
136 Init {
138 #[arg(long, value_name = "DIR", default_value = ".")]
140 target: PathBuf,
141 },
142 Licenses,
144 Resume {
146 #[arg(value_name = "WORKFLOW_ID")]
148 workflow_id: String,
149 },
150}
151
152#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
153pub enum CommandsFormat {
154 Json,
155 Yaml,
156}
157
158#[derive(Debug, Args)]
159pub struct TranscribeArgs {
160 #[arg(value_hint = ValueHint::FilePath)]
162 pub files: Vec<PathBuf>,
163
164 #[arg(
166 short,
167 long,
168 value_name = "LANG",
169 env = "WHISPER_LANGUAGE",
170 help_heading = "Transcription"
171 )]
172 pub language: Option<String>,
173
174 #[arg(
176 short,
177 long,
178 value_name = "MODEL",
179 env = "WHISPER_MODEL",
180 default_value = "large-v3",
181 help_heading = "Transcription"
182 )]
183 pub model: WhisperModel,
184
185 #[arg(long, value_name = "N", default_value_t = 8, value_parser = parse_beam_size, help_heading = "Transcription")]
187 pub beam_size: i32,
188
189 #[arg(long, help_heading = "Output")]
191 pub timestamps: bool,
192
193 #[arg(long, help_heading = "Output", conflicts_with = "output_format")]
195 pub ndjson: bool,
196
197 #[arg(long, value_name = "FMT", help_heading = "Output")]
199 pub output_format: Option<OutputFormat>,
200
201 #[arg(long, value_name = "FLOAT", default_value_t = 0.5, value_parser = parse_vad_threshold, help_heading = "Transcription")]
203 pub vad_threshold: f32,
204
205 #[arg(long, value_name = "N", default_value_t = 2, value_parser = parse_concurrency, help_heading = "Transcription")]
207 pub concurrency: usize,
208
209 #[arg(long, value_name = "FMT", help_heading = "Input")]
211 pub input_format: Option<String>,
212
213 #[arg(
215 long,
216 value_name = "PATH",
217 env = "WHISPER_FFMPEG_BINARY",
218 default_value = "ffmpeg",
219 help_heading = "Input"
220 )]
221 pub ffmpeg_binary: String,
222
223 #[arg(long, env = "WHISPER_NO_FFMPEG_FALLBACK", help_heading = "Input")]
225 pub no_ffmpeg_fallback: bool,
226
227 #[arg(long, help_heading = "Execution")]
229 pub dry_run: bool,
230
231 #[arg(long, value_name = "SECS", value_parser = parse_timeout_secs, help_heading = "Execution")]
233 pub timeout: Option<u64>,
234
235 #[arg(long, value_name = "N", value_parser = parse_retry_count, help_heading = "Execution")]
237 pub retry_count: Option<u32>,
238
239 #[arg(long, value_name = "SECS", value_parser = parse_retry_elapsed, help_heading = "Execution")]
241 pub retry_max_elapsed: Option<u64>,
242
243 #[arg(long, help_heading = "Execution")]
245 pub offline: bool,
246
247 #[arg(long, value_name = "WORKFLOW_ID", help_heading = "Execution")]
249 pub resume: Option<String>,
250}
251
252impl TranscribeArgs {
253 pub fn is_ndjson(&self) -> bool {
254 self.ndjson || matches!(self.output_format, Some(OutputFormat::Ndjson))
255 }
256}
257
258#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
259pub enum OutputFormat {
260 Json,
261 Ndjson,
262}
263
264#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
265pub enum WhisperModel {
266 Tiny,
267 Base,
268 Small,
269 Medium,
270 #[value(name = "large-v3")]
271 LargeV3,
272}
273
274impl WhisperModel {
275 pub fn as_str(&self) -> &'static str {
276 match self {
277 Self::Tiny => "tiny",
278 Self::Base => "base",
279 Self::Small => "small",
280 Self::Medium => "medium",
281 Self::LargeV3 => "large-v3",
282 }
283 }
284}
285
286impl std::fmt::Display for WhisperModel {
287 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
288 f.write_str(self.as_str())
289 }
290}
291
292#[derive(Debug, Subcommand)]
293pub enum ModelsAction {
294 Download {
296 #[arg(value_name = "MODEL")]
298 model: Option<WhisperModel>,
299 },
300 List,
302 Path {
304 #[arg(value_name = "MODEL")]
306 model: Option<WhisperModel>,
307 },
308 Remove {
310 #[arg(value_name = "MODEL")]
312 model: WhisperModel,
313 #[arg(long)]
315 dry_run: bool,
316 },
317}
318
319fn parse_beam_size(s: &str) -> Result<i32, String> {
320 let val: i32 = s.parse().map_err(|e| format!("invalid integer: {e}"))?;
321 if !(1..=16).contains(&val) {
322 return Err(format!("beam size must be between 1 and 16, got {val}"));
323 }
324 Ok(val)
325}
326
327fn parse_vad_threshold(s: &str) -> Result<f32, String> {
328 let val: f32 = s.parse().map_err(|e| format!("invalid float: {e}"))?;
329 if !(0.0..=1.0).contains(&val) {
330 return Err(format!(
331 "VAD threshold must be between 0.0 and 1.0, got {val}"
332 ));
333 }
334 Ok(val)
335}
336
337fn parse_concurrency(s: &str) -> Result<usize, String> {
338 let val: usize = s.parse().map_err(|e| format!("invalid integer: {e}"))?;
339 if !(1..=32).contains(&val) {
340 return Err(format!("concurrency must be between 1 and 32, got {val}"));
341 }
342 Ok(val)
343}
344
345fn parse_timeout_secs(s: &str) -> Result<u64, String> {
346 let val: u64 = s.parse().map_err(|e| format!("invalid integer: {e}"))?;
347 if !(1..=3600).contains(&val) {
348 return Err(format!(
349 "timeout must be between 1 and 3600 seconds, got {val}"
350 ));
351 }
352 Ok(val)
353}
354
355fn parse_retry_count(s: &str) -> Result<u32, String> {
356 let val: u32 = s.parse().map_err(|e| format!("invalid integer: {e}"))?;
357 if val > 10 {
358 return Err(format!("retry count must be between 0 and 10, got {val}"));
359 }
360 Ok(val)
361}
362
363fn parse_retry_elapsed(s: &str) -> Result<u64, String> {
364 let val: u64 = s.parse().map_err(|e| format!("invalid integer: {e}"))?;
365 if !(1..=3600).contains(&val) {
366 return Err(format!(
367 "retry max elapsed must be between 1 and 3600 seconds, got {val}"
368 ));
369 }
370 Ok(val)
371}
372
373#[cfg(test)]
374mod tests {
375 use super::*;
376 use clap::CommandFactory;
377
378 #[test]
379 fn cli_debug_assert() {
380 Cli::command().debug_assert();
381 }
382
383 #[test]
384 fn parse_beam_size_accepts_boundaries() {
385 assert_eq!(parse_beam_size("1").unwrap(), 1);
386 assert_eq!(parse_beam_size("8").unwrap(), 8);
387 assert_eq!(parse_beam_size("16").unwrap(), 16);
388 }
389
390 #[test]
391 fn parse_beam_size_rejects_below_range() {
392 assert!(parse_beam_size("0").is_err());
393 assert!(parse_beam_size("-1").is_err());
394 }
395
396 #[test]
397 fn parse_beam_size_rejects_above_range() {
398 assert!(parse_beam_size("17").is_err());
399 assert!(parse_beam_size("100").is_err());
400 }
401
402 #[test]
403 fn parse_beam_size_rejects_non_integer() {
404 assert!(parse_beam_size("abc").is_err());
405 assert!(parse_beam_size("1.5").is_err());
406 assert!(parse_beam_size("").is_err());
407 }
408
409 #[test]
410 fn parse_vad_threshold_accepts_boundaries() {
411 assert!((parse_vad_threshold("0.0").unwrap() - 0.0).abs() < f32::EPSILON);
412 assert!((parse_vad_threshold("0.5").unwrap() - 0.5).abs() < f32::EPSILON);
413 assert!((parse_vad_threshold("1.0").unwrap() - 1.0).abs() < f32::EPSILON);
414 }
415
416 #[test]
417 fn parse_vad_threshold_rejects_out_of_range() {
418 assert!(parse_vad_threshold("-0.1").is_err());
419 assert!(parse_vad_threshold("1.5").is_err());
420 assert!(parse_vad_threshold("2.0").is_err());
421 }
422
423 #[test]
424 fn parse_vad_threshold_rejects_non_float() {
425 assert!(parse_vad_threshold("abc").is_err());
426 assert!(parse_vad_threshold("").is_err());
427 }
428
429 #[test]
430 fn parse_concurrency_accepts_boundaries() {
431 assert_eq!(parse_concurrency("1").unwrap(), 1);
432 assert_eq!(parse_concurrency("16").unwrap(), 16);
433 assert_eq!(parse_concurrency("32").unwrap(), 32);
434 }
435
436 #[test]
437 fn parse_concurrency_rejects_below_range() {
438 assert!(parse_concurrency("0").is_err());
439 }
440
441 #[test]
442 fn parse_concurrency_rejects_above_range() {
443 assert!(parse_concurrency("33").is_err());
444 assert!(parse_concurrency("1000").is_err());
445 }
446
447 #[test]
448 fn parse_timeout_secs_accepts_valid_range() {
449 assert_eq!(parse_timeout_secs("1").unwrap(), 1);
450 assert_eq!(parse_timeout_secs("3600").unwrap(), 3600);
451 }
452
453 #[test]
454 fn parse_timeout_secs_rejects_out_of_range() {
455 assert!(parse_timeout_secs("0").is_err());
456 assert!(parse_timeout_secs("3601").is_err());
457 }
458
459 #[test]
460 fn parse_retry_count_accepts_max() {
461 assert_eq!(parse_retry_count("0").unwrap(), 0);
462 assert_eq!(parse_retry_count("10").unwrap(), 10);
463 }
464
465 #[test]
466 fn parse_retry_count_rejects_above_max() {
467 assert!(parse_retry_count("11").is_err());
468 assert!(parse_retry_count("100").is_err());
469 }
470}