Skip to main content

sparrow/tools/
tts.rs

1//! Text-to-Speech tool for Sparrow.
2//!
3//! Converts text to speech audio using various providers (edge-tts by default).
4//! Saves audio files in the Sparrow state directory.
5
6use std::path::PathBuf;
7use std::process::Command;
8
9/// Available TTS providers.
10#[derive(Debug, Clone)]
11pub enum TtsProvider {
12    /// Microsoft Edge TTS (free, built-in voices)
13    Edge,
14    /// OpenAI TTS API
15    OpenAI,
16    /// System `say` command (macOS)
17    Say,
18    /// System `espeak` command (Linux)
19    Espeak,
20}
21
22/// Convert text to speech and save to a file.
23///
24/// Returns the path to the generated audio file.
25pub fn text_to_speech(
26    text: &str,
27    provider: TtsProvider,
28    output_dir: Option<PathBuf>,
29) -> anyhow::Result<PathBuf> {
30    let dir = output_dir.unwrap_or_else(|| {
31        let mut d = dirs::state_dir().unwrap_or_else(|| PathBuf::from("."));
32        d.push("sparrow");
33        d.push("audio");
34        d
35    });
36    std::fs::create_dir_all(&dir)?;
37
38    let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S");
39    let output_path = dir.join(format!("tts_{timestamp}.mp3"));
40
41    match provider {
42        TtsProvider::Edge => tts_edge(text, &output_path)?,
43        TtsProvider::OpenAI => tts_openai(text, &output_path)?,
44        TtsProvider::Say => tts_say(text, &output_path)?,
45        TtsProvider::Espeak => tts_espeak(text, &output_path)?,
46    }
47
48    Ok(output_path)
49}
50
51/// Use Microsoft Edge TTS (free, no API key needed).
52fn tts_edge(text: &str, output: &std::path::Path) -> anyhow::Result<()> {
53    // edge-tts is a Python package: pip install edge-tts
54    let status = Command::new("edge-tts")
55        .args([
56            "--text",
57            text,
58            "--voice",
59            "fr-FR-DeniseNeural",
60            "--write-media",
61            &output.to_string_lossy(),
62        ])
63        .stdout(std::process::Stdio::null())
64        .stderr(std::process::Stdio::null())
65        .status();
66
67    match status {
68        Ok(s) if s.success() => Ok(()),
69        Ok(_) => {
70            // Try English fallback
71            Command::new("edge-tts")
72                .args([
73                    "--text",
74                    text,
75                    "--voice",
76                    "en-US-JennyNeural",
77                    "--write-media",
78                    &output.to_string_lossy(),
79                ])
80                .status()?;
81            Ok(())
82        }
83        Err(_) => {
84            anyhow::bail!(
85                "edge-tts not found. Install it with: pip install edge-tts\n\
86                 Or use another provider: text_to_speech(text, provider=TtsProvider::Espeak)"
87            );
88        }
89    }
90}
91
92/// Use OpenAI TTS API.
93fn tts_openai(text: &str, output: &std::path::Path) -> anyhow::Result<()> {
94    let api_key = std::env::var("OPENAI_API_KEY")
95        .or_else(|_| std::env::var("OPENAI_TTS_KEY"))
96        .map_err(|_| anyhow::anyhow!("OPENAI_API_KEY environment variable not set"))?;
97
98    let client = reqwest::blocking::Client::new();
99    let resp = client
100        .post("https://api.openai.com/v1/audio/speech")
101        .header("Authorization", format!("Bearer {}", api_key))
102        .json(&serde_json::json!({
103            "model": "tts-1",
104            "input": text,
105            "voice": "alloy",
106            "response_format": "mp3",
107        }))
108        .send()?;
109
110    if !resp.status().is_success() {
111        let body = resp.text()?;
112        anyhow::bail!("OpenAI TTS failed: {body}");
113    }
114
115    let bytes = resp.bytes()?;
116    std::fs::write(output, bytes)?;
117    Ok(())
118}
119
120/// Use macOS `say` command.
121fn tts_say(text: &str, output: &std::path::Path) -> anyhow::Result<()> {
122    let status = Command::new("say")
123        .args(["-o", &output.with_extension("aiff").to_string_lossy(), text])
124        .status()?;
125
126    if !status.success() {
127        anyhow::bail!("say command failed");
128    }
129    Ok(())
130}
131
132/// Use Linux `espeak` command.
133fn tts_espeak(text: &str, output: &std::path::Path) -> anyhow::Result<()> {
134    let wav_path = output.with_extension("wav");
135    let status = Command::new("espeak")
136        .args(["-w", &wav_path.to_string_lossy(), text])
137        .status()?;
138
139    if !status.success() {
140        anyhow::bail!("espeak command failed. Install: sudo apt install espeak");
141    }
142    Ok(())
143}