1use clap::{Arg, Command};
2use std::io::{self, IsTerminal, Read};
3use std::path::PathBuf;
4
5use crate::audio::{create_temp_audio_file, play_audio_and_cleanup};
6use crate::audio_merge::{check_ffmpeg_available, merge_audio_files};
7use crate::config::{get_presets_map, list_presets, load_config, Config};
8use crate::text_splitter::{check_text_length, split_text, MAX_CHARS};
9use crate::voicepeak::{list_emotion, list_narrator, VoicepeakCommand};
10
11pub fn build_cli() -> Command {
12 Command::new("voicepeak-cli")
13 .version("0.8.0")
14 .about("VOICEPEAK CLI wrapper with presets and auto-play")
15 .arg(
16 Arg::new("text")
17 .value_name("TEXT")
18 .help("Text to say (or pipe from stdin)")
19 .index(1),
20 )
21 .arg(
22 Arg::new("file")
23 .short('t')
24 .long("text")
25 .value_name("FILE")
26 .help("Text file to say")
27 .conflicts_with("text"),
28 )
29 .arg(
30 Arg::new("out")
31 .short('o')
32 .long("out")
33 .value_name("FILE")
34 .help("Path of output file (optional - will play with mpv if not specified)"),
35 )
36 .arg(
37 Arg::new("narrator")
38 .short('n')
39 .long("narrator")
40 .value_name("NAME")
41 .help("Name of voice"),
42 )
43 .arg(
44 Arg::new("emotion")
45 .short('e')
46 .long("emotion")
47 .value_name("EXPR")
48 .help("Emotion expression (e.g., happy=50,sad=50)"),
49 )
50 .arg(
51 Arg::new("preset")
52 .short('p')
53 .long("preset")
54 .value_name("NAME")
55 .help("Use voice preset (use --list-presets to see available presets)")
56 .conflicts_with_all(["narrator", "emotion"]),
57 )
58 .arg(
59 Arg::new("list-narrator")
60 .long("list-narrator")
61 .help("Print voice list")
62 .action(clap::ArgAction::SetTrue),
63 )
64 .arg(
65 Arg::new("list-emotion")
66 .long("list-emotion")
67 .value_name("NARRATOR")
68 .help("Print emotion list for given voice"),
69 )
70 .arg(
71 Arg::new("list-presets")
72 .long("list-presets")
73 .help("Print available presets")
74 .action(clap::ArgAction::SetTrue),
75 )
76 .arg(
77 Arg::new("speed")
78 .long("speed")
79 .value_name("VALUE")
80 .help("Speed (50 - 200)"),
81 )
82 .arg(
83 Arg::new("pitch")
84 .long("pitch")
85 .value_name("VALUE")
86 .help("Pitch (-300 - 300)"),
87 )
88 .arg(
89 Arg::new("strict-length")
90 .long("strict-length")
91 .help("Reject input longer than 140 characters (default: false, allows splitting)")
92 .action(clap::ArgAction::SetTrue),
93 )
94 .arg(
95 Arg::new("playback-mode")
96 .long("playback-mode")
97 .value_name("MODE")
98 .help("Playback mode: sequential or batch (default: batch)")
99 .value_parser(["sequential", "batch"])
100 .default_value("batch"),
101 )
102 .arg(
103 Arg::new("verbose")
104 .long("verbose")
105 .short('v')
106 .help("Enable verbose output (show VOICEPEAK debug messages)")
107 .action(clap::ArgAction::SetTrue),
108 )
109 .arg(
110 Arg::new("bg")
111 .long("bg")
112 .help("Run in background (return immediately)")
113 .action(clap::ArgAction::SetTrue),
114 )
115}
116
117pub fn handle_matches(matches: clap::ArgMatches) -> Result<(), Box<dyn std::error::Error>> {
118 let config = load_config()?;
119
120 if matches.get_flag("list-narrator") {
121 list_narrator();
122 return Ok(());
123 }
124
125 if let Some(narrator) = matches.get_one::<String>("list-emotion") {
126 list_emotion(narrator);
127 return Ok(());
128 }
129
130 if matches.get_flag("list-presets") {
131 list_presets(&config);
132 return Ok(());
133 }
134
135 run_voicepeak(&matches, &config)
136}
137
138fn run_voicepeak(
139 matches: &clap::ArgMatches,
140 config: &Config,
141) -> Result<(), Box<dyn std::error::Error>> {
142 let input_text = if let Some(text) = matches.get_one::<String>("text") {
143 text.clone()
144 } else if let Some(file_path) = matches.get_one::<String>("file") {
145 std::fs::read_to_string(file_path)?
146 } else if !io::stdin().is_terminal() {
147 let mut buffer = String::new();
149 io::stdin().read_to_string(&mut buffer)?;
150 buffer.trim().to_string()
151 } else {
152 return Err("Either text argument, --text file, or pipe input must be specified".into());
153 };
154
155 let presets_map = get_presets_map(config);
156
157 let (narrator, emotion, preset_pitch, preset_speed) =
158 if let Some(preset_name) = matches.get_one::<String>("preset") {
159 let preset = presets_map
161 .get(preset_name)
162 .ok_or_else(|| format!("Unknown preset: {}", preset_name))?;
163 (
164 preset.narrator.clone(),
165 preset.get_emotion_string(),
166 preset.pitch,
167 preset.speed,
168 )
169 } else if let Some(default_preset_name) = &config.default_preset {
170 if let Some(default_preset) = presets_map.get(default_preset_name) {
172 let narrator = matches
174 .get_one::<String>("narrator")
175 .cloned()
176 .unwrap_or_else(|| default_preset.narrator.clone());
177 let emotion = matches
178 .get_one::<String>("emotion")
179 .cloned()
180 .unwrap_or_else(|| default_preset.get_emotion_string());
181 let preset_pitch = if matches.get_one::<String>("emotion").is_some() {
182 None } else {
184 default_preset.pitch
185 };
186 let preset_speed = default_preset.speed;
187 (narrator, emotion, preset_pitch, preset_speed)
188 } else {
189 let narrator = matches
191 .get_one::<String>("narrator")
192 .cloned()
193 .ok_or("No narrator specified. Use --narrator option or configure a preset.")?;
194 let emotion = matches
195 .get_one::<String>("emotion")
196 .cloned()
197 .unwrap_or_default();
198 (narrator, emotion, None, None)
199 }
200 } else {
201 let narrator = matches
203 .get_one::<String>("narrator")
204 .cloned()
205 .ok_or("No narrator specified. Use --narrator option or configure a preset.")?;
206 let emotion = matches
207 .get_one::<String>("emotion")
208 .cloned()
209 .unwrap_or_default();
210 (narrator, emotion, None, None)
211 };
212
213 let speed = matches
214 .get_one::<String>("speed")
215 .cloned()
216 .or_else(|| preset_speed.map(|s| s.to_string()));
217 let pitch = matches
218 .get_one::<String>("pitch")
219 .cloned()
220 .or_else(|| preset_pitch.map(|p| p.to_string()));
221 let should_play = matches.get_one::<String>("out").is_none();
222 let output_path = matches.get_one::<String>("out").map(PathBuf::from);
223 let strict_length = matches.get_flag("strict-length");
224 let playback_mode = matches.get_one::<String>("playback-mode").unwrap();
225 let verbose = matches.get_flag("verbose");
226
227 if strict_length && !check_text_length(&input_text) {
228 return Err(format!(
229 "Input text is too long ({} characters). Maximum allowed is {} characters.\nUse without --strict-length to enable automatic splitting.",
230 input_text.chars().count(),
231 MAX_CHARS
232 ).into());
233 }
234
235 let text_chunks = split_text(&input_text);
236
237 if text_chunks.len() > 1 {
238 println!(
239 "Text is too long, splitting into {} parts...",
240 text_chunks.len()
241 );
242 }
243
244 if (playback_mode == "batch" || (!should_play && text_chunks.len() > 1))
246 && !check_ffmpeg_available()
247 {
248 return Err(
249 "ffmpeg is required for batch mode and multi-chunk file output.\n\
250 Please install ffmpeg or use --playback-mode sequential for auto-play mode.\n\
251 Install ffmpeg: https://ffmpeg.org/download.html"
252 .into(),
253 );
254 }
255
256 #[cfg(unix)]
257 if matches.get_flag("bg") {
258 unsafe {
259 let pid = libc::fork();
260 match pid {
261 -1 => return Err("Failed to fork background process".into()),
262 0 => {
263 libc::setsid();
265 let devnull = libc::open(c"/dev/null".as_ptr(), libc::O_RDWR);
266 if devnull >= 0 {
267 libc::dup2(devnull, libc::STDOUT_FILENO);
268 libc::dup2(devnull, libc::STDERR_FILENO);
269 libc::close(devnull);
270 }
271 }
273 _ => {
274 return Ok(());
276 }
277 }
278 }
279 }
280
281 #[cfg(not(unix))]
282 if matches.get_flag("bg") {
283 return Err("--bg flag is only supported on Unix systems".into());
284 }
285
286 if should_play {
287 if playback_mode == "sequential" {
289 for (i, chunk) in text_chunks.iter().enumerate() {
291 if text_chunks.len() > 1 {
292 println!("Playing part {}/{}", i + 1, text_chunks.len());
293 }
294
295 let temp_path = create_temp_audio_file()?;
296
297 let mut cmd = VoicepeakCommand::new()
298 .text(chunk)
299 .narrator(&narrator)
300 .emotion(&emotion)
301 .output(&temp_path);
302
303 if let Some(speed) = &speed {
304 cmd = cmd.speed(speed);
305 }
306 if let Some(pitch) = &pitch {
307 cmd = cmd.pitch(pitch);
308 }
309
310 cmd.execute_with_verbose(verbose)?;
311 play_audio_and_cleanup(&temp_path)?;
312 }
313 } else {
314 let mut temp_files = Vec::new();
316
317 for (i, chunk) in text_chunks.iter().enumerate() {
318 if text_chunks.len() > 1 {
319 println!("Generating part {}/{}", i + 1, text_chunks.len());
320 }
321
322 let temp_path = create_temp_audio_file()?;
323
324 let mut cmd = VoicepeakCommand::new()
325 .text(chunk)
326 .narrator(&narrator)
327 .emotion(&emotion)
328 .output(&temp_path);
329
330 if let Some(speed) = &speed {
331 cmd = cmd.speed(speed);
332 }
333 if let Some(pitch) = &pitch {
334 cmd = cmd.pitch(pitch);
335 }
336
337 cmd.execute_with_verbose(verbose)?;
338 temp_files.push(temp_path);
339 }
340
341 let final_temp = create_temp_audio_file()?;
343 let temp_paths: Vec<&std::path::Path> =
344 temp_files.iter().map(|p| p.as_path()).collect();
345
346 if text_chunks.len() > 1 {
347 println!("Merging audio files...");
348 merge_audio_files(&temp_paths, &final_temp)?;
349 println!("Merge complete. Playing audio...");
350 } else {
351 merge_audio_files(&temp_paths, &final_temp)?;
352 }
353
354 for temp_file in temp_files {
356 let _ = std::fs::remove_file(temp_file);
357 }
358
359 play_audio_and_cleanup(&final_temp)?;
360 }
361 } else if let Some(output_path) = output_path {
362 let mut temp_files = Vec::new();
364
365 for (i, chunk) in text_chunks.iter().enumerate() {
366 if text_chunks.len() > 1 {
367 println!("Generating part {}/{}", i + 1, text_chunks.len());
368 }
369
370 let temp_path = create_temp_audio_file()?;
371
372 let mut cmd = VoicepeakCommand::new()
373 .text(chunk)
374 .narrator(&narrator)
375 .emotion(&emotion)
376 .output(&temp_path);
377
378 if let Some(speed) = &speed {
379 cmd = cmd.speed(speed);
380 }
381 if let Some(pitch) = &pitch {
382 cmd = cmd.pitch(pitch);
383 }
384
385 cmd.execute_with_verbose(verbose)?;
386 temp_files.push(temp_path);
387 }
388
389 let temp_paths: Vec<&std::path::Path> = temp_files.iter().map(|p| p.as_path()).collect();
391
392 if text_chunks.len() > 1 {
393 println!("Merging audio files...");
394 merge_audio_files(&temp_paths, &output_path)?;
395 println!("Merge complete.");
396 } else {
397 merge_audio_files(&temp_paths, &output_path)?;
398 }
399
400 for temp_file in temp_files {
402 let _ = std::fs::remove_file(temp_file);
403 }
404
405 println!("Audio saved to: {}", output_path.display());
406 }
407
408 Ok(())
409}