use std::ffi::OsStr;
use std::io::{BufRead, BufReader, Read, Write};
use std::os::unix::net::UnixStream;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::time::Duration;
use anyhow::{Context, Result};
use candle_core::Device;
use clap::{Parser, Subcommand, ValueEnum};
use qwen3_tts::{auto_device, hub::ModelPaths, AudioBuffer, Qwen3TTS};
use speakers_core::config::{parse_voice_binding, Config};
use speakers_core::lang;
use speakers_core::model::{ModelVariant, BASE_MODEL_ID};
use speakers_core::paths;
use speakers_core::profile::{self, ProfileMode};
use speakers_core::protocol::{DaemonRequest, DaemonResponse, ResponseData, VoiceSelection};
use tempfile::NamedTempFile;
#[derive(Debug, Parser)]
#[command(
name = "speakers",
about = "Speech Dispatcher bridge and local Qwen3 TTS control"
)]
struct Cli {
#[command(subcommand)]
command: TopCommand,
}
#[derive(Debug, Subcommand)]
enum TopCommand {
Daemon {
#[command(subcommand)]
command: DaemonCommand,
},
Speak {
text: Option<String>,
#[arg(long, conflicts_with = "profile")]
voice: Option<String>,
#[arg(long, conflicts_with = "voice")]
profile: Option<String>,
#[arg(long, default_value = lang::DEFAULT_LANGUAGE)]
lang: String,
#[arg(short, long, default_value = "/tmp/speakers-output.wav")]
output: PathBuf,
},
SpeakSelection {
#[arg(long, value_enum, default_value_t = SelectionSource::Auto)]
source: SelectionSource,
#[arg(long, conflicts_with = "profile")]
voice: Option<String>,
#[arg(long, conflicts_with = "voice")]
profile: Option<String>,
#[arg(long)]
lang: Option<String>,
#[arg(long)]
playback_command: Option<String>,
#[arg(long)]
no_playback: bool,
},
Clone {
#[command(subcommand)]
command: CloneCommand,
},
SpdSynth {
text: Option<String>,
#[arg(long)]
language: Option<String>,
#[arg(long)]
voice: Option<String>,
#[arg(long, conflicts_with = "preset")]
profile: Option<String>,
#[arg(long, conflicts_with = "profile")]
preset: Option<String>,
#[arg(long)]
rate: Option<String>,
#[arg(long)]
pitch: Option<String>,
#[arg(long)]
output: Option<PathBuf>,
#[arg(long)]
playback_command: Option<String>,
#[arg(long)]
no_playback: bool,
},
Doctor,
}
#[derive(Debug, Subcommand)]
enum DaemonCommand {
Start {
#[arg(long)]
model: Option<String>,
#[arg(long)]
foreground: bool,
},
Stop,
Status,
}
#[derive(Debug, Subcommand)]
enum CloneCommand {
Create {
#[arg(long)]
name: String,
#[arg(long)]
ref_audio: PathBuf,
#[arg(long)]
ref_text: Option<String>,
},
List,
Show {
#[arg(long)]
name: String,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
enum SelectionSource {
Auto,
Primary,
Clipboard,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum SelectionBackend {
Wayland,
X11,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum SelectionTarget {
Primary,
Clipboard,
}
impl SelectionTarget {
fn description(self) -> &'static str {
match self {
Self::Primary => "primary selection",
Self::Clipboard => "clipboard",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct SelectionCommand {
program: &'static str,
args: &'static [&'static str],
target: SelectionTarget,
}
impl SelectionCommand {
const fn new(
program: &'static str,
args: &'static [&'static str],
target: SelectionTarget,
) -> Self {
Self {
program,
args,
target,
}
}
}
enum SynthOutput {
Explicit(PathBuf),
Temporary(NamedTempFile),
}
impl SynthOutput {
fn path(&self) -> &Path {
match self {
Self::Explicit(path) => path,
Self::Temporary(file) => file.path(),
}
}
fn should_keep(&self) -> bool {
matches!(self, Self::Explicit(_))
}
fn finalize(self, keep_output: bool) -> Result<Option<PathBuf>> {
match self {
Self::Explicit(path) => Ok(Some(path)),
Self::Temporary(file) if keep_output => {
let path = file
.into_temp_path()
.keep()
.context("failed to preserve synthesized wav")?;
Ok(Some(path))
}
Self::Temporary(_) => Ok(None),
}
}
}
fn main() -> Result<()> {
let cli = Cli::parse();
match cli.command {
TopCommand::Daemon { command } => run_daemon_command(command),
TopCommand::Speak {
text,
voice,
profile,
lang,
output,
} => run_speak(text, voice, profile, lang, output),
TopCommand::SpeakSelection {
source,
voice,
profile,
lang,
playback_command,
no_playback,
} => run_speak_selection(source, voice, profile, lang, playback_command, no_playback),
TopCommand::Clone { command } => run_clone_command(command),
TopCommand::SpdSynth {
text,
language,
voice,
profile,
preset,
rate,
pitch,
output,
playback_command,
no_playback,
} => run_spd_synth(
text,
language,
voice,
profile,
preset,
rate,
pitch,
output,
playback_command,
no_playback,
),
TopCommand::Doctor => run_doctor(),
}
}
fn run_daemon_command(command: DaemonCommand) -> Result<()> {
let config = Config::load_or_create()?;
match command {
DaemonCommand::Start { model, foreground } => {
let parsed_model = parse_model_arg(model.as_deref())?;
let daemon_bin = daemon_binary_path();
if foreground {
let mut cmd = Command::new(&daemon_bin);
if let Some(variant) = parsed_model {
cmd.arg("--model").arg(variant.as_str());
}
let status = cmd.status().with_context(|| {
format!("failed to start daemon binary: {}", daemon_bin.display())
})?;
anyhow::ensure!(status.success(), "daemon exited with status {status}");
return Ok(());
}
if daemon_is_alive(&config).unwrap_or(false) {
println!("daemon already running");
return Ok(());
}
let mut cmd = Command::new(&daemon_bin);
if let Some(variant) = parsed_model {
cmd.arg("--model").arg(variant.as_str());
}
cmd.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::null());
cmd.spawn().with_context(|| {
format!("failed to spawn daemon binary: {}", daemon_bin.display())
})?;
std::thread::sleep(Duration::from_millis(400));
if daemon_is_alive(&config).unwrap_or(false) {
println!("daemon started");
return Ok(());
}
anyhow::bail!(
"daemon process was spawned but health check failed; run `speakers daemon start --foreground` to inspect logs"
)
}
DaemonCommand::Stop => {
let response = send_request(&config, &DaemonRequest::Shutdown)
.context("failed to send shutdown request")?;
ensure_ok(response)?;
println!("daemon stop request sent");
Ok(())
}
DaemonCommand::Status => {
let response =
send_request(&config, &DaemonRequest::Health).context("daemon is not reachable")?;
let data = ensure_ok(response)?;
match data {
Some(ResponseData::Health(health)) => {
println!("status: running");
println!("pid: {}", health.pid);
println!("model: {}", health.model);
println!("device: {}", health.device);
println!("socket: {}", health.socket.display());
println!("uptime_secs: {}", health.uptime_secs);
}
_ => println!("status: running (unexpected health payload)"),
}
Ok(())
}
}
}
fn run_speak(
text: Option<String>,
voice: Option<String>,
profile_name: Option<String>,
language: String,
output: PathBuf,
) -> Result<()> {
let config = Config::load_or_create()?;
let text = read_text(text)?;
if text.trim().is_empty() {
return Ok(());
}
let voice = match (voice, profile_name) {
(Some(name), None) => VoiceSelection::preset(name),
(None, Some(name)) => VoiceSelection::profile(name),
(None, None) => VoiceSelection::preset(lang::DEFAULT_PRESET_VOICE),
(Some(_), Some(_)) => anyhow::bail!("--voice and --profile are mutually exclusive"),
};
let request = DaemonRequest::Synthesize {
text,
language,
output: output.clone(),
voice,
rate: None,
pitch: None,
};
let response = send_request(&config, &request).context("failed to communicate with daemon")?;
ensure_ok(response)?;
println!("wrote {}", output.display());
Ok(())
}
fn run_speak_selection(
source: SelectionSource,
voice: Option<String>,
profile_name: Option<String>,
language: Option<String>,
playback_command: Option<String>,
no_playback: bool,
) -> Result<()> {
let config = Config::load_or_create()?;
let text = read_selected_text(source)?;
let language = normalize_spd_language(
language.as_deref(),
&config.speech_dispatcher.default_language,
);
let voice = resolve_selection_voice(&config, voice, profile_name)?;
run_single_synth_request(
&config,
text,
language,
voice,
None,
playback_command,
no_playback,
)
}
fn run_single_synth_request(
config: &Config,
text: String,
language: String,
voice: VoiceSelection,
output: Option<PathBuf>,
playback_command: Option<String>,
no_playback: bool,
) -> Result<()> {
let output = prepare_synth_output(output)?;
let request = DaemonRequest::Synthesize {
text,
language,
output: output.path().to_path_buf(),
voice,
rate: None,
pitch: None,
};
let response = send_request(config, &request).context("failed to communicate with daemon")?;
ensure_ok(response)?;
if !no_playback {
let playback_command = resolve_playback_command(
playback_command.as_deref(),
config.speech_dispatcher.playback_command.as_deref(),
)?;
run_playback(&playback_command, output.path())?;
}
let keep_output = output.should_keep() || no_playback;
if let Some(path) = output.finalize(keep_output)? {
println!("wrote {}", path.display());
}
Ok(())
}
fn run_clone_command(command: CloneCommand) -> Result<()> {
match command {
CloneCommand::Create {
name,
ref_audio,
ref_text,
} => {
let audio = AudioBuffer::load(&ref_audio).with_context(|| {
format!("failed to load reference audio: {}", ref_audio.display())
})?;
let device = choose_device_for_local_tasks();
let paths = ModelPaths::download(Some(BASE_MODEL_ID))
.context("failed to download base model for cloning")?;
let model = Qwen3TTS::from_paths(&paths, device)
.context("failed to initialize base model for cloning")?;
let prompt = model
.create_voice_clone_prompt(&audio, ref_text.as_deref())
.context("failed to create voice clone prompt")?;
profile::save_profile(&name, &prompt)?;
println!("saved profile {name}");
Ok(())
}
CloneCommand::List => {
let names = profile::list_profiles()?;
for name in names {
println!("{name}");
}
Ok(())
}
CloneCommand::Show { name } => {
let meta = profile::read_profile_meta(&name)?;
println!("{}", serde_json::to_string_pretty(&meta)?);
Ok(())
}
}
}
#[allow(clippy::too_many_arguments)]
fn run_spd_synth(
text: Option<String>,
language: Option<String>,
symbolic_voice: Option<String>,
profile_name: Option<String>,
preset_name: Option<String>,
rate: Option<String>,
pitch: Option<String>,
output: Option<PathBuf>,
playback_command: Option<String>,
no_playback: bool,
) -> Result<()> {
let config = Config::load_or_create()?;
let text = sanitize_spd_text(&read_text(text)?);
if text.trim().is_empty() {
return Ok(());
}
let language = normalize_spd_language(
language.as_deref(),
&config.speech_dispatcher.default_language,
);
let rate = parse_spd_scalar(rate.as_deref(), "rate")?;
let pitch = parse_spd_scalar(pitch.as_deref(), "pitch")?;
let primary_voice = if let Some(name) = profile_name {
VoiceSelection::profile(name)
} else if let Some(name) = preset_name {
VoiceSelection::preset(name)
} else {
config
.speech_dispatcher
.resolve_voice_selection(symbolic_voice.as_deref())?
};
let (output_path, temp_file_guard, keep_output) = match output {
Some(path) => (path, None, true),
None => {
let mut temp = NamedTempFile::new_in(std::env::temp_dir())
.context("failed to allocate temp wav file")?;
let path = temp.path().to_path_buf();
temp.as_file_mut()
.flush()
.context("failed to initialize temp wav file")?;
(path, Some(temp), false)
}
};
let mut attempts = Vec::new();
attempts.push((primary_voice.clone(), "requested voice".to_string()));
if let Some(fallback) = config.speech_dispatcher.fallback_voice_selection() {
if fallback != primary_voice {
attempts.push((fallback, "fallback profile".to_string()));
}
}
let mut daemon_model_cache: Option<ModelVariant> = None;
let mut last_success: Option<VoiceSelection> = None;
let mut failures = Vec::new();
for (voice, source) in attempts {
if let Some(mode) = match resolve_profile_mode_for_spd(&voice) {
Ok(mode) => mode,
Err(err) => {
failures.push(format!(
"{source} ({}) skipped: {err}",
describe_voice(&voice)
));
continue;
}
} {
let VoiceSelection::Profile { name } = &voice else {
unreachable!("profile mode is only available for profile voices")
};
if let Err(err) =
enforce_icl_opt_in_for_profile(name, mode, config.speech_dispatcher.allow_icl)
{
return Err(err).with_context(|| {
format!(
"{source} ({}) is not eligible for Speech Dispatcher synthesis",
describe_voice(&voice)
)
});
}
let daemon_model = match daemon_model_cache {
Some(model) => model,
None => {
let model = daemon_model_from_health(&config).with_context(|| {
format!(
"{source} ({}) requires daemon health check; ensure daemon is running and restart with `speakers daemon start --model base --foreground` for profile voices",
describe_voice(&voice)
)
})?;
daemon_model_cache = Some(model);
model
}
};
if let Err(err) = enforce_daemon_model_for_voice(&voice, daemon_model) {
return Err(err).with_context(|| {
format!(
"{source} ({}) is not eligible for Speech Dispatcher synthesis",
describe_voice(&voice)
)
});
}
}
let request = DaemonRequest::Synthesize {
text: text.clone(),
language: language.clone(),
output: output_path.clone(),
voice: voice.clone(),
rate,
pitch,
};
match send_request(&config, &request)
.context("failed to send synth request to daemon")
.and_then(ensure_ok)
{
Ok(_) => {
last_success = Some(voice);
break;
}
Err(err) => {
failures.push(format!(
"{source} ({}) failed: {err}",
describe_voice(&voice)
));
}
}
}
let Some(_voice_used) = last_success else {
let detail = if failures.is_empty() {
"no valid voice candidates were available".to_string()
} else {
failures.join("; ")
};
anyhow::bail!("all synthesis attempts failed: {detail}");
};
if !no_playback {
let playback_command = resolve_playback_command(
playback_command.as_deref(),
config.speech_dispatcher.playback_command.as_deref(),
)?;
run_playback(&playback_command, &output_path)?;
}
if keep_output {
println!("wrote {}", output_path.display());
}
drop(temp_file_guard);
Ok(())
}
fn run_doctor() -> Result<()> {
let config = Config::load_or_create()?;
let config_path = paths::existing_config_path().unwrap_or_else(paths::config_path);
let socket_path = config.daemon.resolved_socket_path();
println!("config: {}", config_path.display());
println!("socket: {}", socket_path.display());
println!("profiles: {}", paths::profiles_dir().display());
let (daemon_status, daemon_model) = match send_request(&config, &DaemonRequest::Health)
.ok()
.and_then(|resp| {
if !resp.ok {
return None;
}
let data = resp.data?;
match data {
ResponseData::Health(health) => Some((String::from("running"), Some(health.model))),
_ => Some((String::from("running"), None)),
}
}) {
Some(values) => values,
None => (String::from("not running"), None),
};
println!("daemon: {daemon_status}");
println!(
"daemon_model: {}",
daemon_model
.map(|model| model.to_string())
.unwrap_or_else(|| "(unknown)".to_string())
);
println!("request_timeout_ms: {}", config.daemon.request_timeout_ms);
println!(
"synthesis_timeout_ms: {}",
config.daemon.synthesis_timeout_ms
);
println!("allow_icl: {}", config.speech_dispatcher.allow_icl);
println!(
"fallback_profile: {}",
config
.speech_dispatcher
.fallback_profile
.as_deref()
.filter(|s| !s.trim().is_empty())
.unwrap_or("(unset)")
);
println!(
"spd-say: {}",
if command_in_path("spd-say") {
"found"
} else {
"missing"
}
);
println!(
"speech-dispatcher: {}",
if command_in_path("speech-dispatcher") {
"found"
} else {
"missing"
}
);
let spd_dir = paths::config_home().join("speech-dispatcher");
let user_module = spd_dir.join("modules/speakers-generic.conf");
let user_conf = spd_dir.join("speechd.conf");
println!(
"module config: {} ({})",
user_module.display(),
if user_module.exists() {
"present"
} else {
"missing"
}
);
println!(
"speechd.conf: {} ({})",
user_conf.display(),
if user_conf.exists() {
"present"
} else {
"missing"
}
);
if user_conf.exists() {
let body = std::fs::read_to_string(&user_conf)
.with_context(|| format!("failed to read {}", user_conf.display()))?;
let default_enabled = body
.lines()
.any(|line| line.trim().eq_ignore_ascii_case("DefaultModule speakers"));
println!(
"default module: {}",
if default_enabled {
"speakers"
} else {
"not set to speakers"
}
);
}
let mut mapped_profile_bindings = Vec::new();
let mut mapped_icl_bindings = Vec::new();
for (symbolic, binding) in &config.speech_dispatcher.voice_map {
let selection = match parse_voice_binding(binding) {
Ok(value) => value,
Err(err) => {
println!("voice-map warning: {symbolic} has invalid binding '{binding}': {err}");
continue;
}
};
if let VoiceSelection::Profile { name } = selection {
mapped_profile_bindings.push(format!("{symbolic}=profile:{name}"));
match profile::read_profile_meta(&name) {
Ok(meta)
if meta.mode == ProfileMode::Icl && !config.speech_dispatcher.allow_icl =>
{
mapped_icl_bindings.push(format!("{symbolic}=profile:{name}"));
println!(
"voice-map warning: {symbolic} uses ICL profile '{name}' but allow_icl=false"
);
}
Ok(meta) => {
if meta.mode == ProfileMode::Icl {
mapped_icl_bindings.push(format!("{symbolic}=profile:{name}"));
}
}
Err(err) => {
println!(
"voice-map warning: {symbolic} references unreadable profile '{name}': {err}"
);
}
}
}
}
if let Some(fallback) = config.speech_dispatcher.fallback_voice_selection() {
if let VoiceSelection::Profile { name } = fallback {
mapped_profile_bindings.push(format!("fallback=profile:{name}"));
match profile::read_profile_meta(&name) {
Ok(meta)
if meta.mode == ProfileMode::Icl && !config.speech_dispatcher.allow_icl =>
{
mapped_icl_bindings.push(format!("fallback=profile:{name}"));
println!(
"fallback warning: fallback profile '{name}' is ICL but allow_icl=false"
);
}
Ok(meta) => {
if meta.mode == ProfileMode::Icl {
mapped_icl_bindings.push(format!("fallback=profile:{name}"));
}
}
Err(err) => {
println!("fallback warning: fallback profile '{name}' is not readable: {err}");
}
}
}
}
println!(
"profile_mappings: {}",
if mapped_profile_bindings.is_empty() {
"(none)".to_string()
} else {
mapped_profile_bindings.join(", ")
}
);
println!(
"icl_mappings: {}",
if mapped_icl_bindings.is_empty() {
"(none)".to_string()
} else {
mapped_icl_bindings.join(", ")
}
);
let blockers = icl_readiness_blockers(
config.speech_dispatcher.allow_icl,
!mapped_profile_bindings.is_empty(),
!mapped_icl_bindings.is_empty(),
daemon_model,
);
println!(
"icl_readiness: {}",
if blockers.is_empty() {
"ready"
} else {
"blocked"
}
);
for blocker in blockers {
println!("icl_readiness_blocker: {blocker}");
}
Ok(())
}
fn send_request(config: &Config, request: &DaemonRequest) -> Result<DaemonResponse> {
let socket = config.daemon.resolved_socket_path();
let timeout = Duration::from_millis(config.daemon.request_timeout_ms);
let mut stream = UnixStream::connect(&socket)
.with_context(|| format!("failed to connect to daemon socket: {}", socket.display()))?;
stream
.set_read_timeout(Some(timeout))
.context("failed to set daemon read timeout")?;
stream
.set_write_timeout(Some(timeout))
.context("failed to set daemon write timeout")?;
let mut body = serde_json::to_string(request).context("failed to serialize daemon request")?;
body.push('\n');
stream
.write_all(body.as_bytes())
.context("failed to write daemon request")?;
let mut line = String::new();
match BufReader::new(&stream).read_line(&mut line) {
Ok(_) => {}
Err(err)
if err.kind() == std::io::ErrorKind::TimedOut
|| err.kind() == std::io::ErrorKind::WouldBlock =>
{
anyhow::bail!(
"daemon response timed out after {}ms",
config.daemon.request_timeout_ms
);
}
Err(err) => return Err(err).context("failed to read daemon response"),
}
serde_json::from_str::<DaemonResponse>(&line).context("failed to parse daemon response")
}
fn ensure_ok(response: DaemonResponse) -> Result<Option<ResponseData>> {
if response.ok {
return Ok(response.data);
}
let code = response.error_code.unwrap_or_else(|| "unknown".to_string());
let message = response
.error_message
.unwrap_or_else(|| "daemon returned an unspecified error".to_string());
let detail_suffix = response
.error_details
.unwrap_or_default()
.into_iter()
.map(|detail| format!("cause: {detail}"))
.collect::<Vec<_>>();
if detail_suffix.is_empty() {
anyhow::bail!("{code}: {message}");
}
anyhow::bail!("{code}: {message}; {}", detail_suffix.join("; "))
}
fn resolve_profile_mode_for_spd(voice: &VoiceSelection) -> Result<Option<ProfileMode>> {
let VoiceSelection::Profile { name } = voice else {
return Ok(None);
};
let meta = profile::read_profile_meta(name)
.with_context(|| format!("failed to read profile metadata for '{name}'"))?;
Ok(Some(meta.mode))
}
fn enforce_icl_opt_in_for_profile(name: &str, mode: ProfileMode, allow_icl: bool) -> Result<()> {
if mode == ProfileMode::Icl && !allow_icl {
anyhow::bail!(
"icl_not_allowed: profile '{name}' is ICL and speech_dispatcher.allow_icl=false"
);
}
Ok(())
}
fn enforce_daemon_model_for_voice(
voice: &VoiceSelection,
daemon_model: ModelVariant,
) -> Result<()> {
let (voice_kind, required_model) = match voice {
VoiceSelection::Preset { .. } => ("preset", ModelVariant::CustomVoice),
VoiceSelection::Profile { .. } => ("profile", ModelVariant::Base),
};
if daemon_model == required_model {
return Ok(());
}
let remediation = match required_model {
ModelVariant::Base => "speakers daemon start --model base --foreground",
ModelVariant::CustomVoice => "speakers daemon start --model custom-voice --foreground",
};
anyhow::bail!(
"{voice_kind} voice request requires daemon model {required_model}, but daemon is running {daemon_model}; restart with `{remediation}`"
);
}
fn daemon_model_from_health(config: &Config) -> Result<ModelVariant> {
let response = send_request(config, &DaemonRequest::Health)
.context("failed to connect to daemon socket for health check")?;
let data = ensure_ok(response).context("daemon health request failed")?;
let Some(ResponseData::Health(health)) = data else {
anyhow::bail!("daemon returned an unexpected health response payload");
};
Ok(health.model)
}
fn icl_readiness_blockers(
allow_icl: bool,
has_profile_mappings: bool,
has_icl_mappings: bool,
daemon_model: Option<ModelVariant>,
) -> Vec<String> {
let mut blockers = Vec::new();
if has_icl_mappings && !allow_icl {
blockers.push("allow_icl=false while one or more ICL profiles are mapped".to_string());
}
if has_profile_mappings {
match daemon_model {
Some(ModelVariant::Base) => {}
Some(model) => blockers.push(format!(
"profile voices are mapped but daemon model is {model}; required model is base"
)),
None => blockers.push(
"profile voices are mapped but daemon is not reachable to verify model".to_string(),
),
}
}
blockers
}
fn describe_voice(voice: &VoiceSelection) -> String {
match voice {
VoiceSelection::Preset { name } => format!("preset:{name}"),
VoiceSelection::Profile { name } => format!("profile:{name}"),
}
}
fn parse_model_arg(value: Option<&str>) -> Result<Option<ModelVariant>> {
match value {
Some(raw) => Ok(Some(raw.parse::<ModelVariant>()?)),
None => Ok(None),
}
}
fn daemon_binary_path() -> PathBuf {
let candidate = std::env::current_exe()
.ok()
.map(|p| p.with_file_name("speakers-daemon"));
match candidate {
Some(path) if path.exists() => path,
_ => PathBuf::from("speakers-daemon"),
}
}
fn daemon_is_alive(config: &Config) -> Result<bool> {
let response = send_request(config, &DaemonRequest::Health)?;
Ok(response.ok)
}
fn read_text(arg: Option<String>) -> Result<String> {
match arg {
Some(value) => Ok(value),
None => {
let mut buffer = String::new();
std::io::stdin()
.read_to_string(&mut buffer)
.context("failed to read text from stdin")?;
Ok(buffer)
}
}
}
fn choose_device_for_local_tasks() -> Device {
match auto_device() {
Ok(device) => {
if matches!(device, Device::Cpu) {
eprintln!("warning: CUDA not selected for clone task; using CPU");
}
device
}
Err(err) => {
eprintln!("warning: failed to initialize accelerated device ({err}); using CPU");
Device::Cpu
}
}
}
fn resolve_selection_voice(
config: &Config,
voice: Option<String>,
profile_name: Option<String>,
) -> Result<VoiceSelection> {
match (voice, profile_name) {
(Some(name), None) => Ok(VoiceSelection::preset(name)),
(None, Some(name)) => Ok(VoiceSelection::profile(name)),
(None, None) => config.speech_dispatcher.resolve_voice_selection(None),
(Some(_), Some(_)) => anyhow::bail!("--voice and --profile are mutually exclusive"),
}
}
fn prepare_synth_output(output: Option<PathBuf>) -> Result<SynthOutput> {
match output {
Some(path) => Ok(SynthOutput::Explicit(path)),
None => {
let mut temp = NamedTempFile::new_in(std::env::temp_dir())
.context("failed to allocate temp wav file")?;
temp.as_file_mut()
.flush()
.context("failed to initialize temp wav file")?;
Ok(SynthOutput::Temporary(temp))
}
}
}
fn resolve_playback_command(cli: Option<&str>, config: Option<&str>) -> Result<String> {
let picked = cli
.map(str::trim)
.filter(|s| !s.is_empty())
.or_else(|| config.map(str::trim).filter(|s| !s.is_empty()))
.map(ToOwned::to_owned)
.or_else(default_playback_command);
picked.ok_or_else(|| {
anyhow::anyhow!(
"no playback command found (set speech_dispatcher.playback_command or pass --playback-command)"
)
})
}
fn default_playback_command() -> Option<String> {
for candidate in ["pw-play", "paplay", "aplay"] {
if command_in_path(candidate) {
return Some(candidate.to_string());
}
}
None
}
fn run_playback(command_line: &str, wav_path: &Path) -> Result<()> {
let parts = shlex::split(command_line)
.ok_or_else(|| anyhow::anyhow!("failed to parse playback command: {command_line}"))?;
anyhow::ensure!(!parts.is_empty(), "playback command is empty");
let mut cmd = Command::new(&parts[0]);
if parts.len() > 1 {
cmd.args(&parts[1..]);
}
cmd.arg(wav_path);
let status = cmd
.status()
.with_context(|| format!("failed to execute playback command: {command_line}"))?;
anyhow::ensure!(
status.success(),
"playback command exited with status {status}: {command_line}"
);
Ok(())
}
fn command_in_path(command: &str) -> bool {
let path_var = match std::env::var_os("PATH") {
Some(value) => value,
None => return false,
};
for dir in std::env::split_paths(&path_var) {
let candidate = dir.join(command);
if candidate.exists() {
return true;
}
}
false
}
fn read_selected_text(source: SelectionSource) -> Result<String> {
let backend = current_selection_backend()?;
read_selected_text_with(backend, source, command_in_path, execute_selection_command)
}
fn current_selection_backend() -> Result<SelectionBackend> {
detect_selection_backend(
std::env::var_os("WAYLAND_DISPLAY").as_deref(),
std::env::var_os("DISPLAY").as_deref(),
)
}
fn detect_selection_backend(
wayland_display: Option<&OsStr>,
x11_display: Option<&OsStr>,
) -> Result<SelectionBackend> {
if has_env_value(wayland_display) {
return Ok(SelectionBackend::Wayland);
}
if has_env_value(x11_display) {
return Ok(SelectionBackend::X11);
}
anyhow::bail!("unsupported desktop session: neither WAYLAND_DISPLAY nor DISPLAY is set")
}
fn has_env_value(value: Option<&OsStr>) -> bool {
value.is_some_and(|item| !item.is_empty())
}
fn read_selected_text_with<F, G>(
backend: SelectionBackend,
source: SelectionSource,
has_command: F,
mut read_command: G,
) -> Result<String>
where
F: Fn(&str) -> bool,
G: FnMut(&SelectionCommand) -> Result<String>,
{
let commands = installed_selection_commands(backend, source, has_command)?;
let mut failures = Vec::new();
for command in commands {
match read_command(&command) {
Ok(text) => {
let trimmed = text.trim();
if !trimmed.is_empty() {
return Ok(trimmed.to_string());
}
}
Err(err) => failures.push(format!(
"{} via {} failed: {err}",
command.target.description(),
command.program
)),
}
}
let source_description = describe_selection_source(source);
if failures.is_empty() {
anyhow::bail!("{source_description} resolved to empty text");
}
anyhow::bail!(
"{source_description} resolved to empty text; capture attempts: {}",
failures.join("; ")
)
}
fn installed_selection_commands<F>(
backend: SelectionBackend,
source: SelectionSource,
has_command: F,
) -> Result<Vec<SelectionCommand>>
where
F: Fn(&str) -> bool,
{
let commands = selection_commands(backend, source)
.into_iter()
.filter(|command| has_command(command.program))
.collect::<Vec<_>>();
if !commands.is_empty() {
return Ok(commands);
}
match backend {
SelectionBackend::Wayland => {
anyhow::bail!("no supported Wayland selection helper found; install wl-clipboard")
}
SelectionBackend::X11 => {
anyhow::bail!("no supported X11 selection helper found; install xclip or xsel")
}
}
}
fn selection_commands(backend: SelectionBackend, source: SelectionSource) -> Vec<SelectionCommand> {
match (backend, source) {
(SelectionBackend::Wayland, SelectionSource::Auto) => vec![
SelectionCommand::new(
"wl-paste",
&["--no-newline", "--primary"],
SelectionTarget::Primary,
),
SelectionCommand::new("wl-paste", &["--no-newline"], SelectionTarget::Clipboard),
],
(SelectionBackend::Wayland, SelectionSource::Primary) => vec![SelectionCommand::new(
"wl-paste",
&["--no-newline", "--primary"],
SelectionTarget::Primary,
)],
(SelectionBackend::Wayland, SelectionSource::Clipboard) => vec![SelectionCommand::new(
"wl-paste",
&["--no-newline"],
SelectionTarget::Clipboard,
)],
(SelectionBackend::X11, SelectionSource::Auto) => vec![
SelectionCommand::new(
"xclip",
&["-selection", "primary", "-o"],
SelectionTarget::Primary,
),
SelectionCommand::new("xsel", &["--primary", "--output"], SelectionTarget::Primary),
SelectionCommand::new(
"xclip",
&["-selection", "clipboard", "-o"],
SelectionTarget::Clipboard,
),
SelectionCommand::new(
"xsel",
&["--clipboard", "--output"],
SelectionTarget::Clipboard,
),
],
(SelectionBackend::X11, SelectionSource::Primary) => vec![
SelectionCommand::new(
"xclip",
&["-selection", "primary", "-o"],
SelectionTarget::Primary,
),
SelectionCommand::new("xsel", &["--primary", "--output"], SelectionTarget::Primary),
],
(SelectionBackend::X11, SelectionSource::Clipboard) => vec![
SelectionCommand::new(
"xclip",
&["-selection", "clipboard", "-o"],
SelectionTarget::Clipboard,
),
SelectionCommand::new(
"xsel",
&["--clipboard", "--output"],
SelectionTarget::Clipboard,
),
],
}
}
fn execute_selection_command(command: &SelectionCommand) -> Result<String> {
let output = Command::new(command.program)
.args(command.args)
.output()
.with_context(|| format!("failed to execute {}", command.program))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
if stderr.is_empty() {
anyhow::bail!("exit status {}", output.status);
}
anyhow::bail!("exit status {}: {stderr}", output.status);
}
Ok(String::from_utf8_lossy(&output.stdout).into_owned())
}
fn describe_selection_source(source: SelectionSource) -> &'static str {
match source {
SelectionSource::Auto => "selection and clipboard",
SelectionSource::Primary => "primary selection",
SelectionSource::Clipboard => "clipboard",
}
}
fn sanitize_spd_text(input: &str) -> String {
let trimmed = input.trim();
if !trimmed.contains('<') || !trimmed.contains('>') {
return trimmed.to_string();
}
let mut out = String::with_capacity(trimmed.len());
let mut in_tag = false;
for ch in trimmed.chars() {
match ch {
'<' => in_tag = true,
'>' if in_tag => in_tag = false,
_ if !in_tag => out.push(ch),
_ => {}
}
}
out.trim().to_string()
}
fn normalize_spd_language(input: Option<&str>, default: &str) -> String {
let value = input.map(str::trim).filter(|s| !s.is_empty());
match value {
Some(raw) if raw.eq_ignore_ascii_case("c") || raw.eq_ignore_ascii_case("posix") => {
default.to_string()
}
Some(raw) => raw.to_string(),
None => default.to_string(),
}
}
fn parse_spd_scalar(raw: Option<&str>, field: &str) -> Result<Option<i32>> {
let Some(raw) = raw.map(str::trim).filter(|s| !s.is_empty()) else {
return Ok(None);
};
if let Ok(value) = raw.parse::<i32>() {
return Ok(Some(value.clamp(-100, 100)));
}
let value = raw
.parse::<f32>()
.with_context(|| format!("invalid {field} value '{raw}'"))?;
Ok(Some(value.round() as i32).map(|v| v.clamp(-100, 100)))
}
#[cfg(test)]
mod tests {
use super::{
detect_selection_backend, enforce_daemon_model_for_voice, enforce_icl_opt_in_for_profile,
icl_readiness_blockers, installed_selection_commands, read_selected_text_with,
sanitize_spd_text, selection_commands, Cli, SelectionBackend, SelectionCommand,
SelectionSource, SelectionTarget, TopCommand,
};
use clap::Parser;
use speakers_core::model::ModelVariant;
use speakers_core::profile::ProfileMode;
use speakers_core::protocol::VoiceSelection;
use std::ffi::OsStr;
#[test]
fn strips_ssml_tags_from_spd_input() {
let input = "<speak>module activity test</speak>";
assert_eq!(sanitize_spd_text(input), "module activity test");
}
#[test]
fn leaves_plain_text_unchanged() {
assert_eq!(sanitize_spd_text("hello world"), "hello world");
}
#[test]
fn rejects_icl_profile_when_allow_icl_is_false() {
let err = enforce_icl_opt_in_for_profile("sample_voice", ProfileMode::Icl, false)
.expect_err("ICL profile should be blocked");
let message = err.to_string();
assert!(message.contains("icl_not_allowed"));
assert!(message.contains("allow_icl=false"));
}
#[test]
fn rejects_profile_voice_when_daemon_model_is_not_base() {
let err = enforce_daemon_model_for_voice(
&VoiceSelection::profile("sample_voice"),
ModelVariant::CustomVoice,
)
.expect_err("profile voices should require base model");
let message = err.to_string();
assert!(message.contains("requires daemon model base"));
assert!(message.contains("running custom-voice"));
assert!(message.contains("speakers daemon start --model base --foreground"));
}
#[test]
fn allows_preset_voice_when_daemon_model_is_custom_voice() {
enforce_daemon_model_for_voice(&VoiceSelection::preset("ryan"), ModelVariant::CustomVoice)
.expect("preset voices should be valid on custom-voice daemon");
}
#[test]
fn readiness_is_blocked_when_icl_profiles_are_mapped_with_opt_in_disabled() {
let blockers = icl_readiness_blockers(false, true, true, Some(ModelVariant::Base));
assert!(blockers.iter().any(|line| line.contains("allow_icl=false")));
}
#[test]
fn readiness_is_blocked_when_profile_mapping_uses_non_base_daemon() {
let blockers = icl_readiness_blockers(true, true, false, Some(ModelVariant::CustomVoice));
assert!(blockers
.iter()
.any(|line| line.contains("required model is base")));
}
#[test]
fn wayland_auto_checks_primary_before_clipboard() {
let commands = selection_commands(SelectionBackend::Wayland, SelectionSource::Auto);
assert_eq!(
commands,
vec![
SelectionCommand::new(
"wl-paste",
&["--no-newline", "--primary"],
SelectionTarget::Primary,
),
SelectionCommand::new("wl-paste", &["--no-newline"], SelectionTarget::Clipboard,),
]
);
}
#[test]
fn x11_auto_checks_primary_then_clipboard_across_helpers() {
let commands = selection_commands(SelectionBackend::X11, SelectionSource::Auto);
assert_eq!(
commands,
vec![
SelectionCommand::new(
"xclip",
&["-selection", "primary", "-o"],
SelectionTarget::Primary,
),
SelectionCommand::new("xsel", &["--primary", "--output"], SelectionTarget::Primary,),
SelectionCommand::new(
"xclip",
&["-selection", "clipboard", "-o"],
SelectionTarget::Clipboard,
),
SelectionCommand::new(
"xsel",
&["--clipboard", "--output"],
SelectionTarget::Clipboard,
),
]
);
}
#[test]
fn x11_uses_available_helper_order_for_clipboard_only() {
let commands = installed_selection_commands(
SelectionBackend::X11,
SelectionSource::Clipboard,
|program| program == "xsel",
)
.expect("xsel should satisfy the X11 helper requirement");
assert_eq!(
commands,
vec![SelectionCommand::new(
"xsel",
&["--clipboard", "--output"],
SelectionTarget::Clipboard,
)]
);
}
#[test]
fn rejects_selection_capture_without_session_env() {
let err = detect_selection_backend(None, None).expect_err("session detection should fail");
assert!(err.to_string().contains("unsupported desktop session"));
}
#[test]
fn rejects_wayland_capture_when_helper_is_missing() {
let err =
installed_selection_commands(SelectionBackend::Wayland, SelectionSource::Auto, |_| {
false
})
.expect_err("wl-paste should be required on Wayland");
assert!(err.to_string().contains("wl-clipboard"));
}
#[test]
fn auto_selection_falls_back_to_clipboard_after_empty_primary() {
let text = read_selected_text_with(
SelectionBackend::Wayland,
SelectionSource::Auto,
|_| true,
|command| match command.target {
SelectionTarget::Primary => Ok(" ".to_string()),
SelectionTarget::Clipboard => Ok("clipboard text".to_string()),
},
)
.expect("clipboard fallback should succeed");
assert_eq!(text, "clipboard text");
}
#[test]
fn errors_when_selection_sources_are_empty() {
let err = read_selected_text_with(
SelectionBackend::Wayland,
SelectionSource::Auto,
|_| true,
|_| Ok(" ".to_string()),
)
.expect_err("empty capture should fail");
assert!(err
.to_string()
.contains("selection and clipboard resolved to empty text"));
}
#[test]
fn speak_selection_defaults_to_auto_source() {
let cli = Cli::try_parse_from(["speakers", "speak-selection"])
.expect("speak-selection should parse");
match cli.command {
TopCommand::SpeakSelection { source, .. } => {
assert_eq!(source, SelectionSource::Auto)
}
_ => panic!("unexpected command parsed"),
}
}
#[test]
fn speak_selection_rejects_voice_and_profile_together() {
let err = Cli::try_parse_from([
"speakers",
"speak-selection",
"--voice",
"ryan",
"--profile",
"sample_voice",
])
.expect_err("voice/profile conflict should fail");
assert!(err.to_string().contains("--voice"));
assert!(err.to_string().contains("--profile"));
}
#[test]
fn session_detection_prefers_wayland_when_both_displays_are_set() {
let backend =
detect_selection_backend(Some(OsStr::new("wayland-1")), Some(OsStr::new(":0")))
.expect("Wayland should win when both session variables are set");
assert_eq!(backend, SelectionBackend::Wayland);
}
}