use crate::util::get_user_home_path;
use clap::Parser;
use cpal::Device;
use cpal::traits::DeviceTrait;
#[derive(Parser, Debug, Clone)]
#[command(
author = env!("CARGO_PKG_AUTHORS"),
version,
long_about = concat!(
"\n\n",
env!("CARGO_PKG_DESCRIPTION"),
"\n\nHomepage: ",
env!("CARGO_PKG_HOMEPAGE")
)
)]
pub struct Args {
#[arg(long, action = clap::ArgAction::SetTrue)]
pub verbose: bool,
#[arg(
long,
default_value = "ollama",
value_parser = clap::builder::PossibleValuesParser::new(&["ollama", "llama-server"]),
)]
pub llm: String,
#[arg(
long,
default_value = "kokoro",
env = "TTS",
value_parser = clap::builder::PossibleValuesParser::new(&["kokoro", "opentts"])
)]
pub tts: String,
#[arg(long, default_value = WHISPER_MODEL_PATH, env = "WHISPER_MODEL_PATH")]
pub whisper_model_path: String,
#[arg(long, default_value = "en", env = "LANGUAGE")]
pub language: String,
#[arg(long, env = "VOICE")]
pub voice: Option<String>,
#[arg(long, default_value_t = SOUND_THRESHOLD_PEAK_DEFAULT, env = "SOUND_THRESHOLD_PEAK")]
pub sound_threshold_peak: f32,
#[arg(long, default_value_t = END_SILENCE_MS_DEFAULT, env = "END_SILENCE_MS")]
pub end_silence_ms: u64,
#[arg(long, default_value = OLLAMA_URL_DEFAULT, env = "OLLAMA_URL")]
pub ollama_url: String,
#[arg(long, default_value = MODEL_DEFAULT, env = "MODEL")]
pub model: String,
#[arg(
long,
default_value = LLAMASERVER_URL_DEFAULT,
env = "LLAMA_SERVER_URL"
)]
pub llama_server_url: String,
#[arg(long, default_value = OPENTTS_BASE_URL_DEFAULT, env = "OPENTTS_BASE_URL")]
pub opentts_base_url: String,
#[arg(long, action=clap::ArgAction::SetTrue)]
pub list_voices: bool,
#[arg(long, action=clap::ArgAction::SetTrue)]
pub ptt: bool,
}
const SOUND_THRESHOLD_PEAK_DEFAULT: f32 = 0.10;
pub const HANGOVER_MS_DEFAULT: u64 = 300;
const END_SILENCE_MS_DEFAULT: u64 = 1200;
pub const MIN_UTTERANCE_MS_DEFAULT: u64 = 300;
pub const OLLAMA_URL_DEFAULT: &str = "http://localhost:11434";
pub const MODEL_DEFAULT: &str = "llama3.2:3b";
pub const LLAMASERVER_URL_DEFAULT: &str = "http://127.0.0.1:8080";
pub const WHISPER_MODEL_PATH: &str = "~/.whisper-models/ggml-tiny.bin";
const OPENTTS_BASE_URL_DEFAULT: &str = "http://127.0.0.1:5500/api/tts?&vocoder=high&denoiserStrength=0.005&&speakerId=&ssml=false&ssmlNumbers=true&ssmlDates=true&ssmlCurrency=true&cache=false";
impl Args {
pub fn resolved_whisper_model_path(&self) -> String {
if self.whisper_model_path.starts_with("~") {
if let Some(home) = get_user_home_path() {
let rel = self.whisper_model_path.trim_start_matches("~");
let mut p = home;
p.push(&rel[1..]);
p.to_string_lossy().into_owned()
} else {
self.whisper_model_path.clone()
}
} else {
self.whisper_model_path.clone()
}
}
}
pub fn pick_input_config(
device: &Device,
preferred_sr: u32,
) -> Result<cpal::SupportedStreamConfig, Box<dyn std::error::Error + Send + Sync>> {
use cpal::SampleFormat;
let mut candidates: Vec<cpal::SupportedStreamConfig> = Vec::new();
for range in device.supported_input_configs()? {
let min_sr = range.min_sample_rate().0;
let max_sr = range.max_sample_rate().0;
let chosen_sr = preferred_sr.clamp(min_sr, max_sr);
candidates.push(range.with_sample_rate(cpal::SampleRate(chosen_sr)));
}
candidates.sort_by_key(|cfg| {
let fmt_rank = match cfg.sample_format() {
SampleFormat::F32 => 0,
SampleFormat::I16 => 1,
SampleFormat::U16 => 2,
_ => 9,
};
let ch_rank = match cfg.channels() {
1 => 0,
2 => 1,
_ => 5,
};
let sr_rank = cfg.sample_rate().0.abs_diff(preferred_sr);
(fmt_rank, ch_rank, sr_rank)
});
candidates
.into_iter()
.next()
.ok_or_else(|| "no supported input configs".into())
}