Skip to main content

sparrow/tools/
tts.rs

1//! Text-to-Speech tool for Sparrow.
2//!
3//! Converts text to speech audio using various providers (edge-tts by default).
4//! Saves audio files in the Sparrow state directory.
5
6use std::path::PathBuf;
7use std::process::Command;
8
9/// Available TTS providers.
10#[derive(Debug, Clone)]
11pub enum TtsProvider {
12    /// Microsoft Edge TTS (free, built-in voices)
13    Edge,
14    /// OpenAI TTS API
15    OpenAI,
16    /// System `say` command (macOS)
17    Say,
18    /// System `espeak` command (Linux)
19    Espeak,
20}
21
22/// Convert text to speech and save to a file.
23///
24/// Returns the path to the generated audio file.
25pub fn text_to_speech(
26    text: &str,
27    provider: TtsProvider,
28    output_dir: Option<PathBuf>,
29) -> anyhow::Result<PathBuf> {
30    let dir = output_dir.unwrap_or_else(|| {
31        let mut d = dirs::state_dir().unwrap_or_else(|| PathBuf::from("."));
32        d.push("sparrow");
33        d.push("audio");
34        d
35    });
36    std::fs::create_dir_all(&dir)?;
37
38    let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S");
39    let output_path = dir.join(format!("tts_{timestamp}.mp3"));
40
41    match provider {
42        TtsProvider::Edge => tts_edge(text, &output_path)?,
43        TtsProvider::OpenAI => tts_openai(text, &output_path)?,
44        TtsProvider::Say => tts_say(text, &output_path)?,
45        TtsProvider::Espeak => tts_espeak(text, &output_path)?,
46    }
47
48    Ok(output_path)
49}
50
51/// Use Microsoft Edge TTS (free, no API key needed).
52fn tts_edge(text: &str, output: &std::path::Path) -> anyhow::Result<()> {
53    // edge-tts is a Python package: pip install edge-tts
54    let status = Command::new("edge-tts")
55        .args([
56            "--text", text,
57            "--voice", "fr-FR-DeniseNeural",
58            "--write-media", &output.to_string_lossy(),
59        ])
60        .stdout(std::process::Stdio::null())
61        .stderr(std::process::Stdio::null())
62        .status();
63
64    match status {
65        Ok(s) if s.success() => Ok(()),
66        Ok(_) => {
67            // Try English fallback
68            Command::new("edge-tts")
69                .args([
70                    "--text", text,
71                    "--voice", "en-US-JennyNeural",
72                    "--write-media", &output.to_string_lossy(),
73                ])
74                .status()?;
75            Ok(())
76        }
77        Err(_) => {
78            anyhow::bail!(
79                "edge-tts not found. Install it with: pip install edge-tts\n\
80                 Or use another provider: text_to_speech(text, provider=TtsProvider::Espeak)"
81            );
82        }
83    }
84}
85
86/// Use OpenAI TTS API.
87fn tts_openai(text: &str, output: &std::path::Path) -> anyhow::Result<()> {
88    let api_key = std::env::var("OPENAI_API_KEY")
89        .or_else(|_| std::env::var("OPENAI_TTS_KEY"))
90        .map_err(|_| anyhow::anyhow!("OPENAI_API_KEY environment variable not set"))?;
91
92    let client = reqwest::blocking::Client::new();
93    let resp = client
94        .post("https://api.openai.com/v1/audio/speech")
95        .header("Authorization", format!("Bearer {}", api_key))
96        .json(&serde_json::json!({
97            "model": "tts-1",
98            "input": text,
99            "voice": "alloy",
100            "response_format": "mp3",
101        }))
102        .send()?;
103
104    if !resp.status().is_success() {
105        let body = resp.text()?;
106        anyhow::bail!("OpenAI TTS failed: {body}");
107    }
108
109    let bytes = resp.bytes()?;
110    std::fs::write(output, bytes)?;
111    Ok(())
112}
113
114/// Use macOS `say` command.
115fn tts_say(text: &str, output: &std::path::Path) -> anyhow::Result<()> {
116    let status = Command::new("say")
117        .args(["-o", &output.with_extension("aiff").to_string_lossy(), text])
118        .status()?;
119
120    if !status.success() {
121        anyhow::bail!("say command failed");
122    }
123    Ok(())
124}
125
126/// Use Linux `espeak` command.
127fn tts_espeak(text: &str, output: &std::path::Path) -> anyhow::Result<()> {
128    let wav_path = output.with_extension("wav");
129    let status = Command::new("espeak")
130        .args(["-w", &wav_path.to_string_lossy(), text])
131        .status()?;
132
133    if !status.success() {
134        anyhow::bail!("espeak command failed. Install: sudo apt install espeak");
135    }
136    Ok(())
137}