voicepeak_cli/
cli.rs

1use clap::{Arg, Command};
2use std::io::{self, IsTerminal, Read};
3use std::path::PathBuf;
4
5use crate::audio::{create_temp_audio_file, play_audio_and_cleanup};
6use crate::audio_merge::{check_ffmpeg_available, merge_audio_files};
7use crate::config::{get_presets_map, list_presets, load_config, Config};
8use crate::text_splitter::{check_text_length, split_text, MAX_CHARS};
9use crate::voicepeak::{list_emotion, list_narrator, VoicepeakCommand};
10
11pub fn build_cli() -> Command {
12    Command::new("voicepeak-cli")
13        .version("0.4.2")
14        .about("VOICEPEAK CLI wrapper with presets and auto-play")
15        .arg(
16            Arg::new("text")
17                .value_name("TEXT")
18                .help("Text to say (or pipe from stdin)")
19                .index(1),
20        )
21        .arg(
22            Arg::new("file")
23                .short('t')
24                .long("text")
25                .value_name("FILE")
26                .help("Text file to say")
27                .conflicts_with("text"),
28        )
29        .arg(
30            Arg::new("out")
31                .short('o')
32                .long("out")
33                .value_name("FILE")
34                .help("Path of output file (optional - will play with mpv if not specified)"),
35        )
36        .arg(
37            Arg::new("narrator")
38                .short('n')
39                .long("narrator")
40                .value_name("NAME")
41                .help("Name of voice"),
42        )
43        .arg(
44            Arg::new("emotion")
45                .short('e')
46                .long("emotion")
47                .value_name("EXPR")
48                .help("Emotion expression (e.g., happy=50,sad=50)"),
49        )
50        .arg(
51            Arg::new("preset")
52                .short('p')
53                .long("preset")
54                .value_name("NAME")
55                .help("Use voice preset (karin-normal, karin-happy, karin-angry, karin-sad, karin-whisper)")
56                .conflicts_with_all(["narrator", "emotion"]),
57        )
58        .arg(
59            Arg::new("list-narrator")
60                .long("list-narrator")
61                .help("Print voice list")
62                .action(clap::ArgAction::SetTrue),
63        )
64        .arg(
65            Arg::new("list-emotion")
66                .long("list-emotion")
67                .value_name("NARRATOR")
68                .help("Print emotion list for given voice"),
69        )
70        .arg(
71            Arg::new("list-presets")
72                .long("list-presets")
73                .help("Print available presets")
74                .action(clap::ArgAction::SetTrue),
75        )
76        .arg(
77            Arg::new("speed")
78                .long("speed")
79                .value_name("VALUE")
80                .help("Speed (50 - 200)"),
81        )
82        .arg(
83            Arg::new("pitch")
84                .long("pitch")
85                .value_name("VALUE")
86                .help("Pitch (-300 - 300)"),
87        )
88        .arg(
89            Arg::new("strict-length")
90                .long("strict-length")
91                .help("Reject input longer than 140 characters (default: false, allows splitting)")
92                .action(clap::ArgAction::SetTrue),
93        )
94        .arg(
95            Arg::new("playback-mode")
96                .long("playback-mode")
97                .value_name("MODE")
98                .help("Playback mode: sequential or batch (default: batch)")
99                .value_parser(["sequential", "batch"])
100                .default_value("batch"),
101        )
102        .arg(
103            Arg::new("verbose")
104                .long("verbose")
105                .short('v')
106                .help("Enable verbose output (show VOICEPEAK debug messages)")
107                .action(clap::ArgAction::SetTrue),
108        )
109}
110
111pub fn handle_matches(matches: clap::ArgMatches) -> Result<(), Box<dyn std::error::Error>> {
112    let config = load_config()?;
113
114    if matches.get_flag("list-narrator") {
115        list_narrator();
116        return Ok(());
117    }
118
119    if let Some(narrator) = matches.get_one::<String>("list-emotion") {
120        list_emotion(narrator);
121        return Ok(());
122    }
123
124    if matches.get_flag("list-presets") {
125        list_presets(&config);
126        return Ok(());
127    }
128
129    run_voicepeak(&matches, &config)
130}
131
132fn run_voicepeak(
133    matches: &clap::ArgMatches,
134    config: &Config,
135) -> Result<(), Box<dyn std::error::Error>> {
136    let input_text = if let Some(text) = matches.get_one::<String>("text") {
137        text.clone()
138    } else if let Some(file_path) = matches.get_one::<String>("file") {
139        std::fs::read_to_string(file_path)?
140    } else if !io::stdin().is_terminal() {
141        // Read from stdin if available (pipe input)
142        let mut buffer = String::new();
143        io::stdin().read_to_string(&mut buffer)?;
144        buffer.trim().to_string()
145    } else {
146        return Err("Either text argument, --text file, or pipe input must be specified".into());
147    };
148
149    let presets_map = get_presets_map(config);
150
151    let (narrator, emotion, preset_pitch) =
152        if let Some(preset_name) = matches.get_one::<String>("preset") {
153            // Explicit preset specified via -p option
154            let preset = presets_map
155                .get(preset_name)
156                .ok_or_else(|| format!("Unknown preset: {}", preset_name))?;
157            (
158                preset.narrator.clone(),
159                preset.get_emotion_string(),
160                preset.pitch,
161            )
162        } else if let Some(default_preset_name) = &config.default_preset {
163            // No preset specified, but default_preset exists in config
164            if let Some(default_preset) = presets_map.get(default_preset_name) {
165                // Use default preset, but allow individual overrides
166                let narrator = matches
167                    .get_one::<String>("narrator")
168                    .cloned()
169                    .unwrap_or_else(|| default_preset.narrator.clone());
170                let emotion = matches
171                    .get_one::<String>("emotion")
172                    .cloned()
173                    .unwrap_or_else(|| default_preset.get_emotion_string());
174                let preset_pitch = if matches.get_one::<String>("emotion").is_some() {
175                    None // If emotion is overridden, don't use preset pitch
176                } else {
177                    default_preset.pitch
178                };
179                (narrator, emotion, preset_pitch)
180            } else {
181                // Default preset not found, fallback to manual settings
182                let narrator = matches
183                    .get_one::<String>("narrator")
184                    .cloned()
185                    .ok_or("No narrator specified. Use --narrator option or configure a preset.")?;
186                let emotion = matches
187                    .get_one::<String>("emotion")
188                    .cloned()
189                    .unwrap_or_default();
190                (narrator, emotion, None)
191            }
192        } else {
193            // No preset and no default_preset, use manual settings only
194            let narrator = matches
195                .get_one::<String>("narrator")
196                .cloned()
197                .ok_or("No narrator specified. Use --narrator option or configure a preset.")?;
198            let emotion = matches
199                .get_one::<String>("emotion")
200                .cloned()
201                .unwrap_or_default();
202            (narrator, emotion, None)
203        };
204
205    let speed = matches.get_one::<String>("speed");
206    let pitch = matches
207        .get_one::<String>("pitch")
208        .cloned()
209        .or_else(|| preset_pitch.map(|p| p.to_string()));
210    let should_play = matches.get_one::<String>("out").is_none();
211    let output_path = matches.get_one::<String>("out").map(PathBuf::from);
212    let strict_length = matches.get_flag("strict-length");
213    let playback_mode = matches.get_one::<String>("playback-mode").unwrap();
214    let verbose = matches.get_flag("verbose");
215
216    if strict_length && !check_text_length(&input_text) {
217        return Err(format!(
218            "Input text is too long ({} characters). Maximum allowed is {} characters.\nUse without --strict-length to enable automatic splitting.",
219            input_text.chars().count(),
220            MAX_CHARS
221        ).into());
222    }
223
224    let text_chunks = split_text(&input_text);
225
226    if text_chunks.len() > 1 {
227        println!(
228            "Text is too long, splitting into {} parts...",
229            text_chunks.len()
230        );
231    }
232
233    // Check ffmpeg availability for batch mode
234    if (playback_mode == "batch" || (!should_play && text_chunks.len() > 1))
235        && !check_ffmpeg_available()
236    {
237        return Err(
238            "ffmpeg is required for batch mode and multi-chunk file output.\n\
239            Please install ffmpeg or use --playback-mode sequential for auto-play mode.\n\
240            Install ffmpeg: https://ffmpeg.org/download.html"
241                .into(),
242        );
243    }
244
245    if should_play {
246        // Auto-play mode
247        if playback_mode == "sequential" {
248            // Sequential mode: generate and play one by one
249            for (i, chunk) in text_chunks.iter().enumerate() {
250                if text_chunks.len() > 1 {
251                    println!("Playing part {}/{}", i + 1, text_chunks.len());
252                }
253
254                let temp_path = create_temp_audio_file()?;
255
256                let mut cmd = VoicepeakCommand::new()
257                    .text(chunk)
258                    .narrator(&narrator)
259                    .emotion(&emotion)
260                    .output(&temp_path);
261
262                if let Some(speed) = speed {
263                    cmd = cmd.speed(speed);
264                }
265                if let Some(pitch) = &pitch {
266                    cmd = cmd.pitch(pitch);
267                }
268
269                cmd.execute_with_verbose(verbose)?;
270                play_audio_and_cleanup(&temp_path)?;
271            }
272        } else {
273            // Batch mode: generate all, merge, then play
274            let mut temp_files = Vec::new();
275
276            for (i, chunk) in text_chunks.iter().enumerate() {
277                if text_chunks.len() > 1 {
278                    println!("Generating part {}/{}", i + 1, text_chunks.len());
279                }
280
281                let temp_path = create_temp_audio_file()?;
282
283                let mut cmd = VoicepeakCommand::new()
284                    .text(chunk)
285                    .narrator(&narrator)
286                    .emotion(&emotion)
287                    .output(&temp_path);
288
289                if let Some(speed) = speed {
290                    cmd = cmd.speed(speed);
291                }
292                if let Some(pitch) = &pitch {
293                    cmd = cmd.pitch(pitch);
294                }
295
296                cmd.execute_with_verbose(verbose)?;
297                temp_files.push(temp_path);
298            }
299
300            // Merge and play
301            let final_temp = create_temp_audio_file()?;
302            let temp_paths: Vec<&std::path::Path> =
303                temp_files.iter().map(|p| p.as_path()).collect();
304
305            if text_chunks.len() > 1 {
306                println!("Merging audio files...");
307                merge_audio_files(&temp_paths, &final_temp)?;
308                println!("Merge complete. Playing audio...");
309            } else {
310                merge_audio_files(&temp_paths, &final_temp)?;
311            }
312
313            // Cleanup individual temp files
314            for temp_file in temp_files {
315                let _ = std::fs::remove_file(temp_file);
316            }
317
318            play_audio_and_cleanup(&final_temp)?;
319        }
320    } else if let Some(output_path) = output_path {
321        // File output mode
322        let mut temp_files = Vec::new();
323
324        for (i, chunk) in text_chunks.iter().enumerate() {
325            if text_chunks.len() > 1 {
326                println!("Generating part {}/{}", i + 1, text_chunks.len());
327            }
328
329            let temp_path = create_temp_audio_file()?;
330
331            let mut cmd = VoicepeakCommand::new()
332                .text(chunk)
333                .narrator(&narrator)
334                .emotion(&emotion)
335                .output(&temp_path);
336
337            if let Some(speed) = speed {
338                cmd = cmd.speed(speed);
339            }
340            if let Some(pitch) = &pitch {
341                cmd = cmd.pitch(pitch);
342            }
343
344            cmd.execute_with_verbose(verbose)?;
345            temp_files.push(temp_path);
346        }
347
348        // Merge to final output
349        let temp_paths: Vec<&std::path::Path> = temp_files.iter().map(|p| p.as_path()).collect();
350
351        if text_chunks.len() > 1 {
352            println!("Merging audio files...");
353            merge_audio_files(&temp_paths, &output_path)?;
354            println!("Merge complete.");
355        } else {
356            merge_audio_files(&temp_paths, &output_path)?;
357        }
358
359        // Cleanup temp files
360        for temp_file in temp_files {
361            let _ = std::fs::remove_file(temp_file);
362        }
363
364        println!("Audio saved to: {}", output_path.display());
365    }
366
367    Ok(())
368}