1pub mod audio;
2pub mod cli;
3pub mod commands;
4pub mod error;
5pub mod language;
6pub mod model;
7pub mod output;
8pub mod signal;
9pub mod video;
10pub mod whisper;
11
12use std::io::Write;
13use std::process::ExitCode;
14use std::time::Duration;
15
16use clap::CommandFactory;
17use clap::Parser;
18use uuid::Uuid;
19
20use crate::cli::{Cli, Commands, CommandsFormat};
21use crate::error::Error;
22use crate::language::detect::resolve_language;
23
24const DOCTOR_TIMEOUT_SECS: u64 = 5;
25const DEFAULT_INPUT_TIMEOUT_SECS: u64 = 30;
26
27pub fn run() -> ExitCode {
28 signal::reset_sigpipe();
29 signal::install_handlers();
30
31 let cli = Cli::parse();
32 let correlation_id = generate_correlation_id();
33
34 init_tracing(cli.quiet, cli.verbose);
35
36 if cli.print_schema {
37 print_schema_envelope(&correlation_id);
38 return ExitCode::SUCCESS;
39 }
40
41 if cli.print_config {
42 print_config_envelope(&cli, &correlation_id);
43 return ExitCode::SUCCESS;
44 }
45
46 match cli.command {
47 Some(Commands::Transcribe(args)) => run_transcribe(args, cli.no_input, &correlation_id),
48 Some(Commands::Models { action }) => run_models(action, &correlation_id),
49 Some(Commands::Doctor) => run_doctor(&correlation_id),
50 Some(Commands::Schema) => {
51 print_schema_envelope(&correlation_id);
52 ExitCode::SUCCESS
53 }
54 Some(Commands::Config) => {
55 print_config_envelope(&cli, &correlation_id);
56 ExitCode::SUCCESS
57 }
58 Some(Commands::Completions { shell }) => {
59 run_completions(shell);
60 ExitCode::SUCCESS
61 }
62 Some(Commands::Commands { format }) => {
63 run_commands_tree(format, &correlation_id);
64 ExitCode::SUCCESS
65 }
66 Some(Commands::Init { target }) => run_init(&target, &correlation_id),
67 Some(Commands::Licenses) => run_licenses(&correlation_id),
68 Some(Commands::Resume { workflow_id }) => {
69 tracing::info!(workflow_id, "resume not yet supported in v0.1");
70 let value = serde_json::json!({
71 "schema_version": env!("CARGO_PKG_VERSION"),
72 "correlation_id": correlation_id,
73 "resume_supported": false,
74 "workflow_id": workflow_id,
75 "hint": "v0.1 does not persist checkpoints; see --dry-run for input validation",
76 });
77 let _ = output::write_json_value(&value);
78 ExitCode::SUCCESS
79 }
80 None => {
81 let err = Error::NoInput;
82 let _ = output::write_error(&err, &correlation_id);
83 err.to_exit_code()
84 }
85 }
86}
87
88fn generate_correlation_id() -> String {
89 Uuid::now_v7().to_string()
90}
91
92fn run_transcribe(args: cli::TranscribeArgs, no_input: bool, correlation_id: &str) -> ExitCode {
93 let (language, language_source) = resolve_language(args.language.as_deref());
94 tracing::info!(language, language_source, model = %args.model, "starting transcription");
95
96 let effective_no_input = no_input || is_ci();
97
98 if args.files.is_empty() && (effective_no_input || is_terminal::is_terminal(std::io::stdin())) {
99 let err = Error::NoInput;
100 let _ = output::write_error(&err, correlation_id);
101 return err.to_exit_code();
102 }
103
104 match commands::transcribe::run(&args, language, language_source, correlation_id) {
105 Ok(()) => {
106 if signal::is_shutdown_requested() {
107 ExitCode::from(signal::shutdown_signal_exit_code())
108 } else {
109 ExitCode::SUCCESS
110 }
111 }
112 Err(e) => exit_with_error(e, correlation_id),
113 }
114}
115
116fn run_models(action: cli::ModelsAction, correlation_id: &str) -> ExitCode {
117 match commands::models::run(&action, correlation_id) {
118 Ok(()) => ExitCode::SUCCESS,
119 Err(e) => exit_with_error(e, correlation_id),
120 }
121}
122
123fn run_completions(shell: clap_complete::Shell) {
124 let mut cmd = Cli::command();
125 clap_complete::generate(shell, &mut cmd, "whisper-macos-cli", &mut std::io::stdout());
126}
127
128fn run_doctor(correlation_id: &str) -> ExitCode {
129 match commands::doctor::run(correlation_id) {
130 Ok(()) => ExitCode::SUCCESS,
131 Err(e) => exit_with_error(e, correlation_id),
132 }
133}
134
135fn run_commands_tree(format: CommandsFormat, correlation_id: &str) -> ExitCode {
136 let cmd = Cli::command();
137 let name = cmd.get_name().to_string();
138 let about = cmd.get_about().map(|s| s.to_string()).unwrap_or_default();
139 let version = cmd.get_version().unwrap_or("unknown").to_string();
140
141 let subcommands: Vec<serde_json::Value> = cmd
142 .get_subcommands()
143 .map(|sc| {
144 serde_json::json!({
145 "name": sc.get_name(),
146 "about": sc.get_about().map(|a| a.to_string()).unwrap_or_default(),
147 "subcommands": collect_subs(sc),
148 })
149 })
150 .collect();
151
152 let tree = serde_json::json!({
153 "schema_version": env!("CARGO_PKG_VERSION"),
154 "correlation_id": correlation_id,
155 "name": name,
156 "about": about,
157 "version": version,
158 "subcommands": subcommands,
159 });
160
161 let result = match format {
162 CommandsFormat::Json => {
163 let _ = output::write_json_value(&tree);
164 Ok(())
165 }
166 CommandsFormat::Yaml => {
167 tracing::warn!("YAML output not yet supported; emitting JSON");
168 let _ = output::write_json_value(&tree);
169 Ok(())
170 }
171 };
172 match result {
173 Ok(()) => ExitCode::SUCCESS,
174 Err(()) => ExitCode::from(74),
175 }
176}
177
178fn collect_subs(cmd: &clap::Command) -> Vec<serde_json::Value> {
179 cmd.get_subcommands()
180 .map(|sc| {
181 serde_json::json!({
182 "name": sc.get_name(),
183 "about": sc.get_about().map(|a| a.to_string()).unwrap_or_default(),
184 "subcommands": collect_subs(sc),
185 })
186 })
187 .collect()
188}
189
190fn run_init(target: &std::path::Path, correlation_id: &str) -> ExitCode {
191 let skill_path = target.join("SKILL.md");
192 let agents_path = target.join("AGENTS.md");
193
194 let skill_content = format!(
195 "---\nname: whisper-macos-cli\nversion: {ver}\ndescription: Transcribe audio via whisper.cpp on macOS Apple Silicon\n---\n\n# whisper-macos-cli\n\nSee https://github.com/daniloaguiarbr/whisper-macos-cli for full documentation.\n",
196 ver = env!("CARGO_PKG_VERSION")
197 );
198
199 let agents_content = format!(
200 "# Agent Integration Guide\n\nGenerated by whisper-macos-cli v{ver}.\n\n## Quickstart\n\n```bash\nwhisper-macos-cli transcribe audio.ogg\n```\n\nSee `whisper-macos-cli schema` for the JSON Schema contract.\n",
201 ver = env!("CARGO_PKG_VERSION")
202 );
203
204 let result: Result<(), std::io::Error> = (|| {
205 std::fs::write(&skill_path, skill_content)?;
206 std::fs::write(&agents_path, agents_content)?;
207 Ok(())
208 })();
209
210 let value = match &result {
211 Ok(()) => serde_json::json!({
212 "schema_version": env!("CARGO_PKG_VERSION"),
213 "correlation_id": correlation_id,
214 "action": "initialized",
215 "skill": skill_path.display().to_string(),
216 "agents": agents_path.display().to_string(),
217 }),
218 Err(e) => serde_json::json!({
219 "schema_version": env!("CARGO_PKG_VERSION"),
220 "correlation_id": correlation_id,
221 "error": true,
222 "message": e.to_string(),
223 }),
224 };
225 let _ = output::write_json_value(&value);
226
227 match result {
228 Ok(()) => ExitCode::SUCCESS,
229 Err(_) => ExitCode::from(74),
230 }
231}
232
233fn run_licenses(correlation_id: &str) -> ExitCode {
234 let value = serde_json::json!({
235 "schema_version": env!("CARGO_PKG_VERSION"),
236 "correlation_id": correlation_id,
237 "license": "MIT",
238 "third_party_notice": "Run `cargo about generate about.hbs > THIRD-PARTY-LICENSES.md` to regenerate the full attribution report.",
239 "key_dependencies": [
240 {"name": "whisper.cpp", "license": "MIT", "url": "https://github.com/ggml-org/whisper.cpp"},
241 {"name": "symphonia", "license": "MPL-2.0", "url": "https://github.com/pdeljanov/symphonia"},
242 {"name": "clap", "license": "MIT OR Apache-2.0", "url": "https://github.com/clap-rs/clap"},
243 {"name": "serde", "license": "MIT OR Apache-2.0", "url": "https://github.com/serde-rs/serde"},
244 ]
245 });
246 let _ = output::write_json_value(&value);
247 ExitCode::SUCCESS
248}
249
250fn exit_with_error(e: Error, correlation_id: &str) -> ExitCode {
251 if let Error::Io(ref io_err) = e {
252 if io_err.kind() == std::io::ErrorKind::BrokenPipe {
253 let _ = std::io::stdout().flush();
254 return ExitCode::from(141);
255 }
256 }
257 let _ = output::write_error(&e, correlation_id);
258 e.to_exit_code()
259}
260
261fn is_ci() -> bool {
262 matches!(
263 std::env::var("CI").ok().as_deref(),
264 Some("1") | Some("true") | Some("TRUE") | Some("yes")
265 )
266}
267
268fn print_schema_envelope(correlation_id: &str) {
269 let result_schema = serde_json::json!({
270 "$schema": "https://json-schema.org/draft/2020-12/schema",
271 "title": "TranscriptionResult",
272 "type": "object",
273 "properties": {
274 "schema_version": { "type": "string" },
275 "correlation_id": { "type": "string" },
276 "file": { "type": "string" },
277 "language": { "type": "string" },
278 "language_source": { "type": "string", "enum": ["cli", "whisper_auto", "os_locale"] },
279 "model": { "type": "string" },
280 "duration_seconds": { "type": "number" },
281 "text": { "type": "string" },
282 "segments": { "type": "array" },
283 "vad_chunks": { "type": "integer" },
284 "processing_time_ms": { "type": "integer" }
285 },
286 "required": ["schema_version", "correlation_id", "file", "language", "model", "duration_seconds", "text", "vad_chunks", "processing_time_ms"]
287 });
288
289 let envelope = serde_json::json!({
290 "schema_version": env!("CARGO_PKG_VERSION"),
291 "correlation_id": correlation_id,
292 "agentNotes": "whisper-macos-cli emits a single JSON object per invocation. Use schema_version to gate downstream consumers. correlation_id is a UUID v7 generated per process invocation. text is NFC-normalized.",
293 "invariants": [
294 "stdout is always valid JSON or NDJSON",
295 "stderr is always human-readable logs (suppressed with --quiet)",
296 "exit codes follow sysexits.h convention",
297 "large-v3 is the default model",
298 "OGG/Opus (WhatsApp voice messages) is supported natively",
299 "output is reproducible given same input and same model"
300 ],
301 "sideEffects": [
302 "may download a model file on first use (~75MB to ~3GB)",
303 "may write to ~/Library/Application Support/whisper-macos-cli/models/",
304 "may load ~3GB into unified memory on Apple Silicon"
305 ],
306 "idempotent": true,
307 "checkpointable": false,
308 "tokenBudget": {
309 "invocation_overhead": 200,
310 "per_file_transcription": "50 + transcribed text length"
311 },
312 "result_schema": result_schema,
313 "error_schema": {
314 "type": "object",
315 "required": ["schema_version", "error", "code", "message", "category", "retryable", "docs_url", "correlation_id"],
316 "properties": {
317 "schema_version": { "type": "string" },
318 "error": { "type": "boolean" },
319 "code": { "type": "integer" },
320 "message": { "type": "string" },
321 "category": { "type": "string", "enum": ["usage", "input", "data", "config", "service", "internal", "io"] },
322 "retryable": { "type": "boolean" },
323 "retry_after_ms": { "type": ["integer", "null"] },
324 "hint": { "type": ["string", "null"] },
325 "docs_url": { "type": "string" },
326 "correlation_id": { "type": "string" }
327 }
328 }
329 });
330 let _ = output::write_schema(&envelope);
331}
332
333fn print_config_envelope(cli: &Cli, correlation_id: &str) {
334 let value = serde_json::json!({
335 "schema_version": env!("CARGO_PKG_VERSION"),
336 "correlation_id": correlation_id,
337 "config": {
338 "quiet": cli.quiet,
339 "verbose": cli.verbose,
340 "no_input": cli.no_input,
341 "ci_mode": is_ci(),
342 "color": format!("{:?}", cli.color),
343 }
344 });
345 let _ = output::write_json_value(&value);
346}
347
348fn init_tracing(quiet: bool, verbose: u8) {
349 use tracing_subscriber::EnvFilter;
350
351 if quiet {
352 return;
353 }
354
355 let level = match verbose {
356 0 => "warn",
357 1 => "info",
358 2 => "debug",
359 _ => "trace",
360 };
361
362 let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(level));
363
364 if is_terminal::is_terminal(std::io::stderr()) {
365 let _ = tracing_subscriber::fmt()
366 .with_env_filter(filter)
367 .with_writer(std::io::stderr)
368 .with_target(false)
369 .try_init();
370 } else {
371 let _ = tracing_subscriber::fmt()
372 .with_env_filter(filter)
373 .with_writer(std::io::stderr)
374 .with_target(false)
375 .json()
376 .try_init();
377 }
378}
379
380#[doc(hidden)]
381pub const _DOCTOR_TIMEOUT: Duration = Duration::from_secs(DOCTOR_TIMEOUT_SECS);
382#[doc(hidden)]
383pub const _DEFAULT_INPUT_TIMEOUT: Duration = Duration::from_secs(DEFAULT_INPUT_TIMEOUT_SECS);