voicepeak_cli/
cli.rs

1use clap::{Arg, Command};
2use std::io::{self, IsTerminal, Read};
3use std::path::PathBuf;
4
5use crate::audio::{create_temp_audio_file, play_audio_and_cleanup};
6use crate::audio_merge::{check_ffmpeg_available, merge_audio_files};
7use crate::config::{get_presets_map, list_presets, load_config, Config};
8use crate::text_splitter::{check_text_length, split_text, MAX_CHARS};
9use crate::voicepeak::{list_emotion, list_narrator, VoicepeakCommand};
10
11pub fn build_cli() -> Command {
12    Command::new("voicepeak-cli")
13        .version("0.6.0")
14        .about("VOICEPEAK CLI wrapper with presets and auto-play")
15        .arg(
16            Arg::new("text")
17                .value_name("TEXT")
18                .help("Text to say (or pipe from stdin)")
19                .index(1),
20        )
21        .arg(
22            Arg::new("file")
23                .short('t')
24                .long("text")
25                .value_name("FILE")
26                .help("Text file to say")
27                .conflicts_with("text"),
28        )
29        .arg(
30            Arg::new("out")
31                .short('o')
32                .long("out")
33                .value_name("FILE")
34                .help("Path of output file (optional - will play with mpv if not specified)"),
35        )
36        .arg(
37            Arg::new("narrator")
38                .short('n')
39                .long("narrator")
40                .value_name("NAME")
41                .help("Name of voice"),
42        )
43        .arg(
44            Arg::new("emotion")
45                .short('e')
46                .long("emotion")
47                .value_name("EXPR")
48                .help("Emotion expression (e.g., happy=50,sad=50)"),
49        )
50        .arg(
51            Arg::new("preset")
52                .short('p')
53                .long("preset")
54                .value_name("NAME")
55                .help("Use voice preset (karin-normal, karin-happy, karin-angry, karin-sad, karin-whisper)")
56                .conflicts_with_all(["narrator", "emotion"]),
57        )
58        .arg(
59            Arg::new("list-narrator")
60                .long("list-narrator")
61                .help("Print voice list")
62                .action(clap::ArgAction::SetTrue),
63        )
64        .arg(
65            Arg::new("list-emotion")
66                .long("list-emotion")
67                .value_name("NARRATOR")
68                .help("Print emotion list for given voice"),
69        )
70        .arg(
71            Arg::new("list-presets")
72                .long("list-presets")
73                .help("Print available presets")
74                .action(clap::ArgAction::SetTrue),
75        )
76        .arg(
77            Arg::new("speed")
78                .long("speed")
79                .value_name("VALUE")
80                .help("Speed (50 - 200)"),
81        )
82        .arg(
83            Arg::new("pitch")
84                .long("pitch")
85                .value_name("VALUE")
86                .help("Pitch (-300 - 300)"),
87        )
88        .arg(
89            Arg::new("strict-length")
90                .long("strict-length")
91                .help("Reject input longer than 140 characters (default: false, allows splitting)")
92                .action(clap::ArgAction::SetTrue),
93        )
94        .arg(
95            Arg::new("playback-mode")
96                .long("playback-mode")
97                .value_name("MODE")
98                .help("Playback mode: sequential or batch (default: batch)")
99                .value_parser(["sequential", "batch"])
100                .default_value("batch"),
101        )
102        .arg(
103            Arg::new("verbose")
104                .long("verbose")
105                .short('v')
106                .help("Enable verbose output (show VOICEPEAK debug messages)")
107                .action(clap::ArgAction::SetTrue),
108        )
109}
110
111pub fn handle_matches(matches: clap::ArgMatches) -> Result<(), Box<dyn std::error::Error>> {
112    let config = load_config()?;
113
114    if matches.get_flag("list-narrator") {
115        list_narrator();
116        return Ok(());
117    }
118
119    if let Some(narrator) = matches.get_one::<String>("list-emotion") {
120        list_emotion(narrator);
121        return Ok(());
122    }
123
124    if matches.get_flag("list-presets") {
125        list_presets(&config);
126        return Ok(());
127    }
128
129    run_voicepeak(&matches, &config)
130}
131
132fn run_voicepeak(
133    matches: &clap::ArgMatches,
134    config: &Config,
135) -> Result<(), Box<dyn std::error::Error>> {
136    let input_text = if let Some(text) = matches.get_one::<String>("text") {
137        text.clone()
138    } else if let Some(file_path) = matches.get_one::<String>("file") {
139        std::fs::read_to_string(file_path)?
140    } else if !io::stdin().is_terminal() {
141        // Read from stdin if available (pipe input)
142        let mut buffer = String::new();
143        io::stdin().read_to_string(&mut buffer)?;
144        buffer.trim().to_string()
145    } else {
146        return Err("Either text argument, --text file, or pipe input must be specified".into());
147    };
148
149    let presets_map = get_presets_map(config);
150
151    let (narrator, emotion, preset_pitch, preset_speed) =
152        if let Some(preset_name) = matches.get_one::<String>("preset") {
153            // Explicit preset specified via -p option
154            let preset = presets_map
155                .get(preset_name)
156                .ok_or_else(|| format!("Unknown preset: {}", preset_name))?;
157            (
158                preset.narrator.clone(),
159                preset.get_emotion_string(),
160                preset.pitch,
161                preset.speed,
162            )
163        } else if let Some(default_preset_name) = &config.default_preset {
164            // No preset specified, but default_preset exists in config
165            if let Some(default_preset) = presets_map.get(default_preset_name) {
166                // Use default preset, but allow individual overrides
167                let narrator = matches
168                    .get_one::<String>("narrator")
169                    .cloned()
170                    .unwrap_or_else(|| default_preset.narrator.clone());
171                let emotion = matches
172                    .get_one::<String>("emotion")
173                    .cloned()
174                    .unwrap_or_else(|| default_preset.get_emotion_string());
175                let preset_pitch = if matches.get_one::<String>("emotion").is_some() {
176                    None // If emotion is overridden, don't use preset pitch
177                } else {
178                    default_preset.pitch
179                };
180                let preset_speed = default_preset.speed;
181                (narrator, emotion, preset_pitch, preset_speed)
182            } else {
183                // Default preset not found, fallback to manual settings
184                let narrator = matches
185                    .get_one::<String>("narrator")
186                    .cloned()
187                    .ok_or("No narrator specified. Use --narrator option or configure a preset.")?;
188                let emotion = matches
189                    .get_one::<String>("emotion")
190                    .cloned()
191                    .unwrap_or_default();
192                (narrator, emotion, None, None)
193            }
194        } else {
195            // No preset and no default_preset, use manual settings only
196            let narrator = matches
197                .get_one::<String>("narrator")
198                .cloned()
199                .ok_or("No narrator specified. Use --narrator option or configure a preset.")?;
200            let emotion = matches
201                .get_one::<String>("emotion")
202                .cloned()
203                .unwrap_or_default();
204            (narrator, emotion, None, None)
205        };
206
207    let speed = matches
208        .get_one::<String>("speed")
209        .cloned()
210        .or_else(|| preset_speed.map(|s| s.to_string()));
211    let pitch = matches
212        .get_one::<String>("pitch")
213        .cloned()
214        .or_else(|| preset_pitch.map(|p| p.to_string()));
215    let should_play = matches.get_one::<String>("out").is_none();
216    let output_path = matches.get_one::<String>("out").map(PathBuf::from);
217    let strict_length = matches.get_flag("strict-length");
218    let playback_mode = matches.get_one::<String>("playback-mode").unwrap();
219    let verbose = matches.get_flag("verbose");
220
221    if strict_length && !check_text_length(&input_text) {
222        return Err(format!(
223            "Input text is too long ({} characters). Maximum allowed is {} characters.\nUse without --strict-length to enable automatic splitting.",
224            input_text.chars().count(),
225            MAX_CHARS
226        ).into());
227    }
228
229    let text_chunks = split_text(&input_text);
230
231    if text_chunks.len() > 1 {
232        println!(
233            "Text is too long, splitting into {} parts...",
234            text_chunks.len()
235        );
236    }
237
238    // Check ffmpeg availability for batch mode
239    if (playback_mode == "batch" || (!should_play && text_chunks.len() > 1))
240        && !check_ffmpeg_available()
241    {
242        return Err(
243            "ffmpeg is required for batch mode and multi-chunk file output.\n\
244            Please install ffmpeg or use --playback-mode sequential for auto-play mode.\n\
245            Install ffmpeg: https://ffmpeg.org/download.html"
246                .into(),
247        );
248    }
249
250    if should_play {
251        // Auto-play mode
252        if playback_mode == "sequential" {
253            // Sequential mode: generate and play one by one
254            for (i, chunk) in text_chunks.iter().enumerate() {
255                if text_chunks.len() > 1 {
256                    println!("Playing part {}/{}", i + 1, text_chunks.len());
257                }
258
259                let temp_path = create_temp_audio_file()?;
260
261                let mut cmd = VoicepeakCommand::new()
262                    .text(chunk)
263                    .narrator(&narrator)
264                    .emotion(&emotion)
265                    .output(&temp_path);
266
267                if let Some(speed) = &speed {
268                    cmd = cmd.speed(&speed);
269                }
270                if let Some(pitch) = &pitch {
271                    cmd = cmd.pitch(pitch);
272                }
273
274                cmd.execute_with_verbose(verbose)?;
275                play_audio_and_cleanup(&temp_path)?;
276            }
277        } else {
278            // Batch mode: generate all, merge, then play
279            let mut temp_files = Vec::new();
280
281            for (i, chunk) in text_chunks.iter().enumerate() {
282                if text_chunks.len() > 1 {
283                    println!("Generating part {}/{}", i + 1, text_chunks.len());
284                }
285
286                let temp_path = create_temp_audio_file()?;
287
288                let mut cmd = VoicepeakCommand::new()
289                    .text(chunk)
290                    .narrator(&narrator)
291                    .emotion(&emotion)
292                    .output(&temp_path);
293
294                if let Some(speed) = &speed {
295                    cmd = cmd.speed(&speed);
296                }
297                if let Some(pitch) = &pitch {
298                    cmd = cmd.pitch(pitch);
299                }
300
301                cmd.execute_with_verbose(verbose)?;
302                temp_files.push(temp_path);
303            }
304
305            // Merge and play
306            let final_temp = create_temp_audio_file()?;
307            let temp_paths: Vec<&std::path::Path> =
308                temp_files.iter().map(|p| p.as_path()).collect();
309
310            if text_chunks.len() > 1 {
311                println!("Merging audio files...");
312                merge_audio_files(&temp_paths, &final_temp)?;
313                println!("Merge complete. Playing audio...");
314            } else {
315                merge_audio_files(&temp_paths, &final_temp)?;
316            }
317
318            // Cleanup individual temp files
319            for temp_file in temp_files {
320                let _ = std::fs::remove_file(temp_file);
321            }
322
323            play_audio_and_cleanup(&final_temp)?;
324        }
325    } else if let Some(output_path) = output_path {
326        // File output mode
327        let mut temp_files = Vec::new();
328
329        for (i, chunk) in text_chunks.iter().enumerate() {
330            if text_chunks.len() > 1 {
331                println!("Generating part {}/{}", i + 1, text_chunks.len());
332            }
333
334            let temp_path = create_temp_audio_file()?;
335
336            let mut cmd = VoicepeakCommand::new()
337                .text(chunk)
338                .narrator(&narrator)
339                .emotion(&emotion)
340                .output(&temp_path);
341
342            if let Some(speed) = &speed {
343                cmd = cmd.speed(&speed);
344            }
345            if let Some(pitch) = &pitch {
346                cmd = cmd.pitch(pitch);
347            }
348
349            cmd.execute_with_verbose(verbose)?;
350            temp_files.push(temp_path);
351        }
352
353        // Merge to final output
354        let temp_paths: Vec<&std::path::Path> = temp_files.iter().map(|p| p.as_path()).collect();
355
356        if text_chunks.len() > 1 {
357            println!("Merging audio files...");
358            merge_audio_files(&temp_paths, &output_path)?;
359            println!("Merge complete.");
360        } else {
361            merge_audio_files(&temp_paths, &output_path)?;
362        }
363
364        // Cleanup temp files
365        for temp_file in temp_files {
366            let _ = std::fs::remove_file(temp_file);
367        }
368
369        println!("Audio saved to: {}", output_path.display());
370    }
371
372    Ok(())
373}