use std::path::{Path, PathBuf};
use voxtus::audio::{check_ffmpeg, convert_to_mp3};
use voxtus::cli::Args;
use voxtus::config::{AVAILABLE_MODELS, Config, OutputFormat, is_url};
use voxtus::download::download_audio_sync;
use voxtus::formats::Transcript;
use voxtus::logging::setup_logger;
use voxtus::signals::{setup_signal_handlers, shutdown_requested};
use voxtus::transcribe::transcribe;
fn main() {
if let Err(e) = setup_signal_handlers() {
eprintln!("Warning: Failed to set up signal handlers: {}", e);
}
let exit_code = run();
std::process::exit(exit_code);
}
fn run() -> i32 {
let args = Args::parse_args();
if args.list_models {
print_available_models();
return 0;
}
let config = match Config::from_args(&args) {
Ok(c) => c,
Err(e) => {
eprintln!("Error: {}", e);
return 1;
}
};
if let Err(e) = setup_logger(config.verbose_level) {
eprintln!("Error initializing logger: {}", e);
return 1;
}
if let Err(e) = check_ffmpeg() {
log::error!("{}", e);
log::error!(" - macOS: brew install ffmpeg");
log::error!(" - Ubuntu/Debian: sudo apt install ffmpeg");
log::error!(" - Windows: Download from https://ffmpeg.org/download.html");
return 1;
}
match process(&config) {
Ok(()) => 0,
Err(e) => {
log::error!("{}", e);
1
}
}
}
fn process(config: &Config) -> voxtus::Result<()> {
let temp_dir = tempfile::tempdir()?;
let (audio_path, title) = if is_url(&config.input_path) {
download_and_convert(config, temp_dir.path())?
} else {
convert_local_file(config, temp_dir.path())?
};
if shutdown_requested() {
log::info!("Interrupted, exiting.");
return Ok(());
}
let transcript = transcribe(
&audio_path,
temp_dir.path(),
&title,
&config.input_path,
&config.model,
)?;
if shutdown_requested() {
log::info!("Interrupted, exiting.");
return Ok(());
}
output_transcript(&transcript, config)?;
if config.keep_audio {
let final_audio = config
.output_dir
.join(format!("{}.mp3", get_output_name(&title, config)));
std::fs::copy(&audio_path, &final_audio)?;
if !config.stdout_mode {
log::info!("Audio saved: {}", final_audio.display());
}
}
Ok(())
}
fn download_and_convert(config: &Config, temp_dir: &Path) -> voxtus::Result<(PathBuf, String)> {
if !config.stdout_mode {
log::info!("Downloading: {}", config.input_path);
}
let (downloaded_path, info) = download_audio_sync(&config.input_path, temp_dir)?;
if !config.stdout_mode {
log::info!("Downloaded: {}", info.title);
}
let mp3_path = temp_dir.join("audio.mp3");
convert_to_mp3(&downloaded_path, &mp3_path)?;
Ok((mp3_path, info.title))
}
fn convert_local_file(config: &Config, temp_dir: &Path) -> voxtus::Result<(PathBuf, String)> {
let input_path = Path::new(&config.input_path);
if !input_path.exists() {
return Err(voxtus::Error::FileNotFound(config.input_path.clone()));
}
let title = input_path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("audio")
.to_string();
if !config.stdout_mode {
log::info!("Converting: {}", input_path.display());
}
let audio_path = if input_path.extension().is_some_and(|e| e == "mp3") {
let dest = temp_dir.join("audio.mp3");
std::fs::copy(input_path, &dest)?;
dest
} else {
let output_path = temp_dir.join("audio.mp3");
convert_to_mp3(input_path, &output_path)?;
output_path
};
Ok((audio_path, title))
}
fn output_transcript(transcript: &Transcript, config: &Config) -> voxtus::Result<()> {
let output_name = get_output_name(&transcript.metadata.title, config);
for format in &config.formats {
let content = match format {
OutputFormat::Txt => transcript.to_txt(),
OutputFormat::Json => transcript.to_json(),
OutputFormat::Srt => transcript.to_srt(),
OutputFormat::Vtt => transcript.to_vtt(),
};
if config.stdout_mode {
println!("{}", content);
} else {
let output_path =
config
.output_dir
.join(format!("{}.{}", output_name, format.extension()));
if output_path.exists() && !config.overwrite_files {
eprint!("File '{}' exists. Overwrite? [y/N] ", output_path.display());
let mut response = String::new();
if std::io::stdin().read_line(&mut response).is_err()
|| !response.trim().eq_ignore_ascii_case("y")
{
return Err(voxtus::Error::UserAborted);
}
}
std::fs::write(&output_path, content)?;
log::info!("Saved: {}", output_path.display());
}
}
Ok(())
}
fn get_output_name(title: &str, config: &Config) -> String {
config
.custom_name
.clone()
.unwrap_or_else(|| title.to_string())
}
fn print_available_models() {
println!("Available Whisper Models:\n");
let groups = [
("Tiny Models", &["tiny", "tiny.en"][..]),
("Base Models", &["base", "base.en"][..]),
("Small Models", &["small", "small.en"][..]),
("Medium Models", &["medium", "medium.en"][..]),
("Large Models", &["large", "large-v2", "large-v3"][..]),
];
for (group_name, model_names) in groups {
println!("{}:", group_name);
for name in model_names.iter() {
if let Some(model) = AVAILABLE_MODELS.iter().find(|m| m.name == *name) {
println!(" {:<18} - {}", model.name, model.description);
println!(
" {} params, {} VRAM, {}",
model.params, model.vram, model.languages
);
}
}
println!();
}
println!("Examples:");
println!(" voxtus --model tiny video.mp4 # Fastest transcription");
println!(" voxtus --model small video.mp4 # Good balance (default)");
println!(" voxtus --model large-v3 video.mp4 # Best accuracy");
println!(" voxtus --model small.en audio.mp3 # English-only, faster");
}