1use std::path::{Path, PathBuf};
6use std::process::Command;
7
8#[derive(Debug, Clone)]
10pub enum SttBackend {
11 OpenAIApi,
13 WhisperCli,
15 WhisperCpp,
17}
18
19pub fn transcribe(
23 audio_path: &Path,
24 language: Option<&str>,
25 backend: SttBackend,
26) -> anyhow::Result<String> {
27 if !audio_path.exists() {
28 anyhow::bail!("Audio file not found: {}", audio_path.display());
29 }
30
31 match backend {
32 SttBackend::OpenAIApi => transcribe_openai(audio_path, language),
33 SttBackend::WhisperCli => transcribe_whisper_cli(audio_path, language),
34 SttBackend::WhisperCpp => transcribe_whisper_cpp(audio_path, language),
35 }
36}
37
38fn transcribe_openai(audio_path: &Path, language: Option<&str>) -> anyhow::Result<String> {
40 let api_key = std::env::var("OPENAI_API_KEY")
41 .map_err(|_| anyhow::anyhow!("OPENAI_API_KEY environment variable not set"))?;
42
43 let client = reqwest::blocking::Client::new();
44 let form = reqwest::blocking::multipart::Form::new()
45 .file("file", audio_path)?
46 .text("model", "whisper-1");
47
48 let form = if let Some(lang) = language {
49 form.text("language", lang.to_string())
50 } else {
51 form
52 };
53
54 let resp = client
55 .post("https://api.openai.com/v1/audio/transcriptions")
56 .header("Authorization", format!("Bearer {}", api_key))
57 .multipart(form)
58 .send()?;
59
60 if !resp.status().is_success() {
61 let body = resp.text()?;
62 anyhow::bail!("OpenAI Whisper failed: {body}");
63 }
64
65 let json: serde_json::Value = resp.json()?;
66 json.get("text")
67 .and_then(|t| t.as_str())
68 .map(|t| t.to_string())
69 .ok_or_else(|| anyhow::anyhow!("No transcription in response"))
70}
71
72fn transcribe_whisper_cli(audio_path: &Path, language: Option<&str>) -> anyhow::Result<String> {
74 let mut cmd = Command::new("whisper");
75 cmd.arg(audio_path);
76
77 if let Some(lang) = language {
78 cmd.args(["--language", lang]);
79 }
80
81 cmd.arg("--output_format").arg("txt");
82 cmd.arg("--output_dir")
83 .arg(audio_path.parent().unwrap_or(Path::new(".")));
84
85 let output = cmd.output()?;
86
87 if !output.status.success() {
88 let stderr = String::from_utf8_lossy(&output.stderr);
89 anyhow::bail!("whisper CLI failed: {stderr}\nInstall: pip install openai-whisper");
90 }
91
92 let txt_path = audio_path.with_extension("txt");
94 if txt_path.exists() {
95 Ok(std::fs::read_to_string(&txt_path)?)
96 } else {
97 Ok(String::from_utf8_lossy(&output.stdout).to_string())
99 }
100}
101
102fn transcribe_whisper_cpp(audio_path: &Path, language: Option<&str>) -> anyhow::Result<String> {
104 let mut cmd = Command::new("whisper-cpp");
105 cmd.arg("-f").arg(audio_path);
106
107 if let Some(lang) = language {
108 cmd.arg("-l").arg(lang);
109 }
110
111 cmd.arg("-otxt");
112
113 let output = cmd.output()?;
114
115 if !output.status.success() {
116 let stderr = String::from_utf8_lossy(&output.stderr);
117 anyhow::bail!("whisper-cpp failed: {stderr}");
118 }
119
120 let txt_path = audio_path.with_extension("txt");
122 if txt_path.exists() {
123 Ok(std::fs::read_to_string(&txt_path)?)
124 } else {
125 Ok(String::from_utf8_lossy(&output.stdout).to_string())
126 }
127}