Skip to main content

voicepeak_cli/
cli.rs

1use clap::{Arg, Command};
2use std::io::{self, IsTerminal, Read};
3use std::path::PathBuf;
4
5use crate::audio::{create_temp_audio_file, play_audio_and_cleanup};
6use crate::audio_merge::{check_ffmpeg_available, merge_audio_files};
7use crate::config::{get_presets_map, list_presets, load_config, Config};
8use crate::text_splitter::{check_text_length, split_text, MAX_CHARS};
9use crate::voicepeak::{list_emotion, list_narrator, VoicepeakCommand};
10
11pub fn build_cli() -> Command {
12    Command::new("voicepeak-cli")
13        .version("0.8.0")
14        .about("VOICEPEAK CLI wrapper with presets and auto-play")
15        .arg(
16            Arg::new("text")
17                .value_name("TEXT")
18                .help("Text to say (or pipe from stdin)")
19                .index(1),
20        )
21        .arg(
22            Arg::new("file")
23                .short('t')
24                .long("text")
25                .value_name("FILE")
26                .help("Text file to say")
27                .conflicts_with("text"),
28        )
29        .arg(
30            Arg::new("out")
31                .short('o')
32                .long("out")
33                .value_name("FILE")
34                .help("Path of output file (optional - will play with mpv if not specified)"),
35        )
36        .arg(
37            Arg::new("narrator")
38                .short('n')
39                .long("narrator")
40                .value_name("NAME")
41                .help("Name of voice"),
42        )
43        .arg(
44            Arg::new("emotion")
45                .short('e')
46                .long("emotion")
47                .value_name("EXPR")
48                .help("Emotion expression (e.g., happy=50,sad=50)"),
49        )
50        .arg(
51            Arg::new("preset")
52                .short('p')
53                .long("preset")
54                .value_name("NAME")
55                .help("Use voice preset (use --list-presets to see available presets)")
56                .conflicts_with_all(["narrator", "emotion"]),
57        )
58        .arg(
59            Arg::new("list-narrator")
60                .long("list-narrator")
61                .help("Print voice list")
62                .action(clap::ArgAction::SetTrue),
63        )
64        .arg(
65            Arg::new("list-emotion")
66                .long("list-emotion")
67                .value_name("NARRATOR")
68                .help("Print emotion list for given voice"),
69        )
70        .arg(
71            Arg::new("list-presets")
72                .long("list-presets")
73                .help("Print available presets")
74                .action(clap::ArgAction::SetTrue),
75        )
76        .arg(
77            Arg::new("speed")
78                .long("speed")
79                .value_name("VALUE")
80                .help("Speed (50 - 200)"),
81        )
82        .arg(
83            Arg::new("pitch")
84                .long("pitch")
85                .value_name("VALUE")
86                .help("Pitch (-300 - 300)"),
87        )
88        .arg(
89            Arg::new("strict-length")
90                .long("strict-length")
91                .help("Reject input longer than 140 characters (default: false, allows splitting)")
92                .action(clap::ArgAction::SetTrue),
93        )
94        .arg(
95            Arg::new("playback-mode")
96                .long("playback-mode")
97                .value_name("MODE")
98                .help("Playback mode: sequential or batch (default: batch)")
99                .value_parser(["sequential", "batch"])
100                .default_value("batch"),
101        )
102        .arg(
103            Arg::new("verbose")
104                .long("verbose")
105                .short('v')
106                .help("Enable verbose output (show VOICEPEAK debug messages)")
107                .action(clap::ArgAction::SetTrue),
108        )
109        .arg(
110            Arg::new("bg")
111                .long("bg")
112                .help("Run in background (return immediately)")
113                .action(clap::ArgAction::SetTrue),
114        )
115}
116
117pub fn handle_matches(matches: clap::ArgMatches) -> Result<(), Box<dyn std::error::Error>> {
118    let config = load_config()?;
119
120    if matches.get_flag("list-narrator") {
121        list_narrator();
122        return Ok(());
123    }
124
125    if let Some(narrator) = matches.get_one::<String>("list-emotion") {
126        list_emotion(narrator);
127        return Ok(());
128    }
129
130    if matches.get_flag("list-presets") {
131        list_presets(&config);
132        return Ok(());
133    }
134
135    run_voicepeak(&matches, &config)
136}
137
138fn run_voicepeak(
139    matches: &clap::ArgMatches,
140    config: &Config,
141) -> Result<(), Box<dyn std::error::Error>> {
142    let input_text = if let Some(text) = matches.get_one::<String>("text") {
143        text.clone()
144    } else if let Some(file_path) = matches.get_one::<String>("file") {
145        std::fs::read_to_string(file_path)?
146    } else if !io::stdin().is_terminal() {
147        // Read from stdin if available (pipe input)
148        let mut buffer = String::new();
149        io::stdin().read_to_string(&mut buffer)?;
150        buffer.trim().to_string()
151    } else {
152        return Err("Either text argument, --text file, or pipe input must be specified".into());
153    };
154
155    let presets_map = get_presets_map(config);
156
157    let (narrator, emotion, preset_pitch, preset_speed) =
158        if let Some(preset_name) = matches.get_one::<String>("preset") {
159            // Explicit preset specified via -p option
160            let preset = presets_map
161                .get(preset_name)
162                .ok_or_else(|| format!("Unknown preset: {}", preset_name))?;
163            (
164                preset.narrator.clone(),
165                preset.get_emotion_string(),
166                preset.pitch,
167                preset.speed,
168            )
169        } else if let Some(default_preset_name) = &config.default_preset {
170            // No preset specified, but default_preset exists in config
171            if let Some(default_preset) = presets_map.get(default_preset_name) {
172                // Use default preset, but allow individual overrides
173                let narrator = matches
174                    .get_one::<String>("narrator")
175                    .cloned()
176                    .unwrap_or_else(|| default_preset.narrator.clone());
177                let emotion = matches
178                    .get_one::<String>("emotion")
179                    .cloned()
180                    .unwrap_or_else(|| default_preset.get_emotion_string());
181                let preset_pitch = if matches.get_one::<String>("emotion").is_some() {
182                    None // If emotion is overridden, don't use preset pitch
183                } else {
184                    default_preset.pitch
185                };
186                let preset_speed = default_preset.speed;
187                (narrator, emotion, preset_pitch, preset_speed)
188            } else {
189                // Default preset not found, fallback to manual settings
190                let narrator = matches
191                    .get_one::<String>("narrator")
192                    .cloned()
193                    .ok_or("No narrator specified. Use --narrator option or configure a preset.")?;
194                let emotion = matches
195                    .get_one::<String>("emotion")
196                    .cloned()
197                    .unwrap_or_default();
198                (narrator, emotion, None, None)
199            }
200        } else {
201            // No preset and no default_preset, use manual settings only
202            let narrator = matches
203                .get_one::<String>("narrator")
204                .cloned()
205                .ok_or("No narrator specified. Use --narrator option or configure a preset.")?;
206            let emotion = matches
207                .get_one::<String>("emotion")
208                .cloned()
209                .unwrap_or_default();
210            (narrator, emotion, None, None)
211        };
212
213    let speed = matches
214        .get_one::<String>("speed")
215        .cloned()
216        .or_else(|| preset_speed.map(|s| s.to_string()));
217    let pitch = matches
218        .get_one::<String>("pitch")
219        .cloned()
220        .or_else(|| preset_pitch.map(|p| p.to_string()));
221    let should_play = matches.get_one::<String>("out").is_none();
222    let output_path = matches.get_one::<String>("out").map(PathBuf::from);
223    let strict_length = matches.get_flag("strict-length");
224    let playback_mode = matches.get_one::<String>("playback-mode").unwrap();
225    let verbose = matches.get_flag("verbose");
226
227    if strict_length && !check_text_length(&input_text) {
228        return Err(format!(
229            "Input text is too long ({} characters). Maximum allowed is {} characters.\nUse without --strict-length to enable automatic splitting.",
230            input_text.chars().count(),
231            MAX_CHARS
232        ).into());
233    }
234
235    let text_chunks = split_text(&input_text);
236
237    if text_chunks.len() > 1 {
238        println!(
239            "Text is too long, splitting into {} parts...",
240            text_chunks.len()
241        );
242    }
243
244    // Check ffmpeg availability for batch mode
245    if (playback_mode == "batch" || (!should_play && text_chunks.len() > 1))
246        && !check_ffmpeg_available()
247    {
248        return Err(
249            "ffmpeg is required for batch mode and multi-chunk file output.\n\
250            Please install ffmpeg or use --playback-mode sequential for auto-play mode.\n\
251            Install ffmpeg: https://ffmpeg.org/download.html"
252                .into(),
253        );
254    }
255
256    #[cfg(unix)]
257    if matches.get_flag("bg") {
258        unsafe {
259            let pid = libc::fork();
260            match pid {
261                -1 => return Err("Failed to fork background process".into()),
262                0 => {
263                    // Child: detach session and suppress output
264                    libc::setsid();
265                    let devnull = libc::open(c"/dev/null".as_ptr(), libc::O_RDWR);
266                    if devnull >= 0 {
267                        libc::dup2(devnull, libc::STDOUT_FILENO);
268                        libc::dup2(devnull, libc::STDERR_FILENO);
269                        libc::close(devnull);
270                    }
271                    // Child continues execution below
272                }
273                _ => {
274                    // Parent: return immediately
275                    return Ok(());
276                }
277            }
278        }
279    }
280
281    #[cfg(not(unix))]
282    if matches.get_flag("bg") {
283        return Err("--bg flag is only supported on Unix systems".into());
284    }
285
286    if should_play {
287        // Auto-play mode
288        if playback_mode == "sequential" {
289            // Sequential mode: generate and play one by one
290            for (i, chunk) in text_chunks.iter().enumerate() {
291                if text_chunks.len() > 1 {
292                    println!("Playing part {}/{}", i + 1, text_chunks.len());
293                }
294
295                let temp_path = create_temp_audio_file()?;
296
297                let mut cmd = VoicepeakCommand::new()
298                    .text(chunk)
299                    .narrator(&narrator)
300                    .emotion(&emotion)
301                    .output(&temp_path);
302
303                if let Some(speed) = &speed {
304                    cmd = cmd.speed(speed);
305                }
306                if let Some(pitch) = &pitch {
307                    cmd = cmd.pitch(pitch);
308                }
309
310                cmd.execute_with_verbose(verbose)?;
311                play_audio_and_cleanup(&temp_path)?;
312            }
313        } else {
314            // Batch mode: generate all, merge, then play
315            let mut temp_files = Vec::new();
316
317            for (i, chunk) in text_chunks.iter().enumerate() {
318                if text_chunks.len() > 1 {
319                    println!("Generating part {}/{}", i + 1, text_chunks.len());
320                }
321
322                let temp_path = create_temp_audio_file()?;
323
324                let mut cmd = VoicepeakCommand::new()
325                    .text(chunk)
326                    .narrator(&narrator)
327                    .emotion(&emotion)
328                    .output(&temp_path);
329
330                if let Some(speed) = &speed {
331                    cmd = cmd.speed(speed);
332                }
333                if let Some(pitch) = &pitch {
334                    cmd = cmd.pitch(pitch);
335                }
336
337                cmd.execute_with_verbose(verbose)?;
338                temp_files.push(temp_path);
339            }
340
341            // Merge and play
342            let final_temp = create_temp_audio_file()?;
343            let temp_paths: Vec<&std::path::Path> =
344                temp_files.iter().map(|p| p.as_path()).collect();
345
346            if text_chunks.len() > 1 {
347                println!("Merging audio files...");
348                merge_audio_files(&temp_paths, &final_temp)?;
349                println!("Merge complete. Playing audio...");
350            } else {
351                merge_audio_files(&temp_paths, &final_temp)?;
352            }
353
354            // Cleanup individual temp files
355            for temp_file in temp_files {
356                let _ = std::fs::remove_file(temp_file);
357            }
358
359            play_audio_and_cleanup(&final_temp)?;
360        }
361    } else if let Some(output_path) = output_path {
362        // File output mode
363        let mut temp_files = Vec::new();
364
365        for (i, chunk) in text_chunks.iter().enumerate() {
366            if text_chunks.len() > 1 {
367                println!("Generating part {}/{}", i + 1, text_chunks.len());
368            }
369
370            let temp_path = create_temp_audio_file()?;
371
372            let mut cmd = VoicepeakCommand::new()
373                .text(chunk)
374                .narrator(&narrator)
375                .emotion(&emotion)
376                .output(&temp_path);
377
378            if let Some(speed) = &speed {
379                cmd = cmd.speed(speed);
380            }
381            if let Some(pitch) = &pitch {
382                cmd = cmd.pitch(pitch);
383            }
384
385            cmd.execute_with_verbose(verbose)?;
386            temp_files.push(temp_path);
387        }
388
389        // Merge to final output
390        let temp_paths: Vec<&std::path::Path> = temp_files.iter().map(|p| p.as_path()).collect();
391
392        if text_chunks.len() > 1 {
393            println!("Merging audio files...");
394            merge_audio_files(&temp_paths, &output_path)?;
395            println!("Merge complete.");
396        } else {
397            merge_audio_files(&temp_paths, &output_path)?;
398        }
399
400        // Cleanup temp files
401        for temp_file in temp_files {
402            let _ = std::fs::remove_file(temp_file);
403        }
404
405        println!("Audio saved to: {}", output_path.display());
406    }
407
408    Ok(())
409}