1pub mod audio;
2pub mod cli;
3pub mod commands;
4pub mod error;
5pub mod language;
6pub mod model;
7pub mod output;
8pub mod signal;
9pub mod whisper;
10
11use std::io::Write;
12use std::process::ExitCode;
13use std::time::Duration;
14
15use clap::CommandFactory;
16use clap::Parser;
17use uuid::Uuid;
18
19use crate::cli::{Cli, Commands, CommandsFormat};
20use crate::error::Error;
21use crate::language::detect::resolve_language;
22
23const DOCTOR_TIMEOUT_SECS: u64 = 5;
24const DEFAULT_INPUT_TIMEOUT_SECS: u64 = 30;
25
26pub fn run() -> ExitCode {
27 signal::reset_sigpipe();
28 signal::install_handlers();
29
30 let cli = Cli::parse();
31 let correlation_id = generate_correlation_id();
32
33 init_tracing(cli.quiet, cli.verbose);
34
35 if cli.print_schema {
36 print_schema_envelope(&correlation_id);
37 return ExitCode::SUCCESS;
38 }
39
40 if cli.print_config {
41 print_config_envelope(&cli, &correlation_id);
42 return ExitCode::SUCCESS;
43 }
44
45 match cli.command {
46 Some(Commands::Transcribe(args)) => run_transcribe(args, cli.no_input, &correlation_id),
47 Some(Commands::Models { action }) => run_models(action, &correlation_id),
48 Some(Commands::Doctor) => run_doctor(&correlation_id),
49 Some(Commands::Schema) => {
50 print_schema_envelope(&correlation_id);
51 ExitCode::SUCCESS
52 }
53 Some(Commands::Config) => {
54 print_config_envelope(&cli, &correlation_id);
55 ExitCode::SUCCESS
56 }
57 Some(Commands::Completions { shell }) => {
58 run_completions(shell);
59 ExitCode::SUCCESS
60 }
61 Some(Commands::Commands { format }) => {
62 run_commands_tree(format, &correlation_id);
63 ExitCode::SUCCESS
64 }
65 Some(Commands::Init { target }) => run_init(&target, &correlation_id),
66 Some(Commands::Licenses) => run_licenses(&correlation_id),
67 Some(Commands::Resume { workflow_id }) => {
68 tracing::info!(workflow_id, "resume not yet supported in v0.1");
69 let value = serde_json::json!({
70 "schema_version": env!("CARGO_PKG_VERSION"),
71 "correlation_id": correlation_id,
72 "resume_supported": false,
73 "workflow_id": workflow_id,
74 "hint": "v0.1 does not persist checkpoints; see --dry-run for input validation",
75 });
76 let _ = output::write_json_value(&value);
77 ExitCode::SUCCESS
78 }
79 None => {
80 let err = Error::NoInput;
81 let _ = output::write_error(&err, &correlation_id);
82 err.to_exit_code()
83 }
84 }
85}
86
87fn generate_correlation_id() -> String {
88 Uuid::now_v7().to_string()
89}
90
91fn run_transcribe(args: cli::TranscribeArgs, no_input: bool, correlation_id: &str) -> ExitCode {
92 let (language, language_source) = resolve_language(args.language.as_deref());
93 tracing::info!(language, language_source, model = %args.model, "starting transcription");
94
95 let effective_no_input = no_input || is_ci();
96
97 if args.files.is_empty() && (effective_no_input || is_terminal::is_terminal(std::io::stdin())) {
98 let err = Error::NoInput;
99 let _ = output::write_error(&err, correlation_id);
100 return err.to_exit_code();
101 }
102
103 match commands::transcribe::run(&args, language, language_source, correlation_id) {
104 Ok(()) => {
105 if signal::is_shutdown_requested() {
106 ExitCode::from(signal::shutdown_signal_exit_code())
107 } else {
108 ExitCode::SUCCESS
109 }
110 }
111 Err(e) => exit_with_error(e, correlation_id),
112 }
113}
114
115fn run_models(action: cli::ModelsAction, correlation_id: &str) -> ExitCode {
116 match commands::models::run(&action, correlation_id) {
117 Ok(()) => ExitCode::SUCCESS,
118 Err(e) => exit_with_error(e, correlation_id),
119 }
120}
121
122fn run_completions(shell: clap_complete::Shell) {
123 let mut cmd = Cli::command();
124 clap_complete::generate(shell, &mut cmd, "whisper-macos-cli", &mut std::io::stdout());
125}
126
127fn run_doctor(correlation_id: &str) -> ExitCode {
128 match commands::doctor::run(correlation_id) {
129 Ok(()) => ExitCode::SUCCESS,
130 Err(e) => exit_with_error(e, correlation_id),
131 }
132}
133
134fn run_commands_tree(format: CommandsFormat, correlation_id: &str) -> ExitCode {
135 let cmd = Cli::command();
136 let name = cmd.get_name().to_string();
137 let about = cmd.get_about().map(|s| s.to_string()).unwrap_or_default();
138 let version = cmd.get_version().unwrap_or("unknown").to_string();
139
140 let subcommands: Vec<serde_json::Value> = cmd
141 .get_subcommands()
142 .map(|sc| {
143 serde_json::json!({
144 "name": sc.get_name(),
145 "about": sc.get_about().map(|a| a.to_string()).unwrap_or_default(),
146 "subcommands": collect_subs(sc),
147 })
148 })
149 .collect();
150
151 let tree = serde_json::json!({
152 "schema_version": env!("CARGO_PKG_VERSION"),
153 "correlation_id": correlation_id,
154 "name": name,
155 "about": about,
156 "version": version,
157 "subcommands": subcommands,
158 });
159
160 let result = match format {
161 CommandsFormat::Json => {
162 let _ = output::write_json_value(&tree);
163 Ok(())
164 }
165 CommandsFormat::Yaml => {
166 tracing::warn!("YAML output not yet supported; emitting JSON");
167 let _ = output::write_json_value(&tree);
168 Ok(())
169 }
170 };
171 match result {
172 Ok(()) => ExitCode::SUCCESS,
173 Err(()) => ExitCode::from(74),
174 }
175}
176
177fn collect_subs(cmd: &clap::Command) -> Vec<serde_json::Value> {
178 cmd.get_subcommands()
179 .map(|sc| {
180 serde_json::json!({
181 "name": sc.get_name(),
182 "about": sc.get_about().map(|a| a.to_string()).unwrap_or_default(),
183 "subcommands": collect_subs(sc),
184 })
185 })
186 .collect()
187}
188
189fn run_init(target: &std::path::Path, correlation_id: &str) -> ExitCode {
190 let skill_path = target.join("SKILL.md");
191 let agents_path = target.join("AGENTS.md");
192
193 let skill_content = format!(
194 "---\nname: whisper-macos-cli\nversion: {ver}\ndescription: Transcribe audio via whisper.cpp on macOS Apple Silicon\n---\n\n# whisper-macos-cli\n\nSee https://github.com/daniloaguiarbr/whisper-macos-cli for full documentation.\n",
195 ver = env!("CARGO_PKG_VERSION")
196 );
197
198 let agents_content = format!(
199 "# Agent Integration Guide\n\nGenerated by whisper-macos-cli v{ver}.\n\n## Quickstart\n\n```bash\nwhisper-macos-cli transcribe audio.ogg\n```\n\nSee `whisper-macos-cli schema` for the JSON Schema contract.\n",
200 ver = env!("CARGO_PKG_VERSION")
201 );
202
203 let result: Result<(), std::io::Error> = (|| {
204 std::fs::write(&skill_path, skill_content)?;
205 std::fs::write(&agents_path, agents_content)?;
206 Ok(())
207 })();
208
209 let value = match &result {
210 Ok(()) => serde_json::json!({
211 "schema_version": env!("CARGO_PKG_VERSION"),
212 "correlation_id": correlation_id,
213 "action": "initialized",
214 "skill": skill_path.display().to_string(),
215 "agents": agents_path.display().to_string(),
216 }),
217 Err(e) => serde_json::json!({
218 "schema_version": env!("CARGO_PKG_VERSION"),
219 "correlation_id": correlation_id,
220 "error": true,
221 "message": e.to_string(),
222 }),
223 };
224 let _ = output::write_json_value(&value);
225
226 match result {
227 Ok(()) => ExitCode::SUCCESS,
228 Err(_) => ExitCode::from(74),
229 }
230}
231
232fn run_licenses(correlation_id: &str) -> ExitCode {
233 let value = serde_json::json!({
234 "schema_version": env!("CARGO_PKG_VERSION"),
235 "correlation_id": correlation_id,
236 "license": "MIT",
237 "third_party_notice": "Run `cargo about generate about.hbs > THIRD-PARTY-LICENSES.md` to regenerate the full attribution report.",
238 "key_dependencies": [
239 {"name": "whisper.cpp", "license": "MIT", "url": "https://github.com/ggml-org/whisper.cpp"},
240 {"name": "symphonia", "license": "MPL-2.0", "url": "https://github.com/pdeljanov/symphonia"},
241 {"name": "clap", "license": "MIT OR Apache-2.0", "url": "https://github.com/clap-rs/clap"},
242 {"name": "serde", "license": "MIT OR Apache-2.0", "url": "https://github.com/serde-rs/serde"},
243 ]
244 });
245 let _ = output::write_json_value(&value);
246 ExitCode::SUCCESS
247}
248
249fn exit_with_error(e: Error, correlation_id: &str) -> ExitCode {
250 if let Error::Io(ref io_err) = e {
251 if io_err.kind() == std::io::ErrorKind::BrokenPipe {
252 let _ = std::io::stdout().flush();
253 return ExitCode::from(141);
254 }
255 }
256 let _ = output::write_error(&e, correlation_id);
257 e.to_exit_code()
258}
259
260fn is_ci() -> bool {
261 matches!(
262 std::env::var("CI").ok().as_deref(),
263 Some("1") | Some("true") | Some("TRUE") | Some("yes")
264 )
265}
266
267fn print_schema_envelope(correlation_id: &str) {
268 let result_schema = serde_json::json!({
269 "$schema": "https://json-schema.org/draft/2020-12/schema",
270 "title": "TranscriptionResult",
271 "type": "object",
272 "properties": {
273 "schema_version": { "type": "string" },
274 "correlation_id": { "type": "string" },
275 "file": { "type": "string" },
276 "language": { "type": "string" },
277 "language_source": { "type": "string", "enum": ["cli", "whisper_auto", "os_locale"] },
278 "model": { "type": "string" },
279 "duration_seconds": { "type": "number" },
280 "text": { "type": "string" },
281 "segments": { "type": "array" },
282 "vad_chunks": { "type": "integer" },
283 "processing_time_ms": { "type": "integer" }
284 },
285 "required": ["schema_version", "correlation_id", "file", "language", "model", "duration_seconds", "text", "vad_chunks", "processing_time_ms"]
286 });
287
288 let envelope = serde_json::json!({
289 "schema_version": env!("CARGO_PKG_VERSION"),
290 "correlation_id": correlation_id,
291 "agentNotes": "whisper-macos-cli emits a single JSON object per invocation. Use schema_version to gate downstream consumers. correlation_id is a UUID v7 generated per process invocation. text is NFC-normalized.",
292 "invariants": [
293 "stdout is always valid JSON or NDJSON",
294 "stderr is always human-readable logs (suppressed with --quiet)",
295 "exit codes follow sysexits.h convention",
296 "large-v3 is the default model",
297 "OGG/Opus (WhatsApp voice messages) is supported natively",
298 "output is reproducible given same input and same model"
299 ],
300 "sideEffects": [
301 "may download a model file on first use (~75MB to ~3GB)",
302 "may write to ~/Library/Application Support/whisper-macos-cli/models/",
303 "may load ~3GB into unified memory on Apple Silicon"
304 ],
305 "idempotent": true,
306 "checkpointable": false,
307 "tokenBudget": {
308 "invocation_overhead": 200,
309 "per_file_transcription": "50 + transcribed text length"
310 },
311 "result_schema": result_schema,
312 "error_schema": {
313 "type": "object",
314 "required": ["schema_version", "error", "code", "message", "category", "retryable", "docs_url", "correlation_id"],
315 "properties": {
316 "schema_version": { "type": "string" },
317 "error": { "type": "boolean" },
318 "code": { "type": "integer" },
319 "message": { "type": "string" },
320 "category": { "type": "string", "enum": ["usage", "input", "data", "config", "service", "internal", "io"] },
321 "retryable": { "type": "boolean" },
322 "retry_after_ms": { "type": ["integer", "null"] },
323 "hint": { "type": ["string", "null"] },
324 "docs_url": { "type": "string" },
325 "correlation_id": { "type": "string" }
326 }
327 }
328 });
329 let _ = output::write_schema(&envelope);
330}
331
332fn print_config_envelope(cli: &Cli, correlation_id: &str) {
333 let value = serde_json::json!({
334 "schema_version": env!("CARGO_PKG_VERSION"),
335 "correlation_id": correlation_id,
336 "config": {
337 "quiet": cli.quiet,
338 "verbose": cli.verbose,
339 "no_input": cli.no_input,
340 "ci_mode": is_ci(),
341 "color": format!("{:?}", cli.color),
342 }
343 });
344 let _ = output::write_json_value(&value);
345}
346
347fn init_tracing(quiet: bool, verbose: u8) {
348 use tracing_subscriber::EnvFilter;
349
350 if quiet {
351 return;
352 }
353
354 let level = match verbose {
355 0 => "warn",
356 1 => "info",
357 2 => "debug",
358 _ => "trace",
359 };
360
361 let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(level));
362
363 if is_terminal::is_terminal(std::io::stderr()) {
364 let _ = tracing_subscriber::fmt()
365 .with_env_filter(filter)
366 .with_writer(std::io::stderr)
367 .with_target(false)
368 .try_init();
369 } else {
370 let _ = tracing_subscriber::fmt()
371 .with_env_filter(filter)
372 .with_writer(std::io::stderr)
373 .with_target(false)
374 .json()
375 .try_init();
376 }
377}
378
379#[doc(hidden)]
380pub const _DOCTOR_TIMEOUT: Duration = Duration::from_secs(DOCTOR_TIMEOUT_SECS);
381#[doc(hidden)]
382pub const _DEFAULT_INPUT_TIMEOUT: Duration = Duration::from_secs(DEFAULT_INPUT_TIMEOUT_SECS);