use serde::{Deserialize, Serialize};
fn default_audio_format() -> String {
"wav".to_string()
}
pub const QWEN3_TTS_CONTROL_FIELDS: &[&str] = &[
"reference_audio_path",
"reference_text",
"voice_instruction",
];
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SynthesizeRequest {
pub text: String,
#[serde(default)]
pub model: Option<String>,
#[serde(default)]
pub voice: Option<String>,
#[serde(default)]
pub language: Option<String>,
#[serde(default)]
pub speed: Option<f32>,
#[serde(default)]
pub output_path: Option<String>,
#[serde(default = "default_audio_format")]
pub format: String,
#[serde(default, alias = "ref_audio", skip_serializing_if = "Option::is_none")]
pub reference_audio_path: Option<String>,
#[serde(default, alias = "ref_text", skip_serializing_if = "Option::is_none")]
pub reference_text: Option<String>,
#[serde(default, alias = "instruct", skip_serializing_if = "Option::is_none")]
pub voice_instruction: Option<String>,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub strict_capabilities: bool,
}
impl Default for SynthesizeRequest {
fn default() -> Self {
Self {
text: String::new(),
model: None,
voice: None,
language: None,
speed: None,
output_path: None,
format: default_audio_format(),
reference_audio_path: None,
reference_text: None,
voice_instruction: None,
strict_capabilities: false,
}
}
}
impl SynthesizeRequest {
pub fn requested_advanced_controls(&self) -> Vec<&'static str> {
let mut out = Vec::new();
if self.reference_audio_path.is_some() {
out.push("reference_audio_path");
}
if self.reference_text.is_some() {
out.push("reference_text");
}
if self.voice_instruction.is_some() {
out.push("voice_instruction");
}
out
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SynthesizeResult {
pub audio_path: String,
pub media_type: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub model_used: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub voice_used: Option<String>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn legacy_request_payload_still_deserializes() {
let json = r#"{"text":"hello","voice":"Chelsie"}"#;
let req: SynthesizeRequest = serde_json::from_str(json).unwrap();
assert_eq!(req.text, "hello");
assert_eq!(req.voice.as_deref(), Some("Chelsie"));
assert!(req.reference_audio_path.is_none());
assert!(req.reference_text.is_none());
assert!(req.voice_instruction.is_none());
assert!(!req.strict_capabilities);
}
#[test]
fn unset_qwen3_fields_are_omitted_from_output_json() {
let req = SynthesizeRequest {
text: "hi".into(),
..SynthesizeRequest::default()
};
let json = serde_json::to_string(&req).unwrap();
assert!(!json.contains("reference_audio_path"));
assert!(!json.contains("reference_text"));
assert!(!json.contains("voice_instruction"));
assert!(!json.contains("strict_capabilities"));
}
#[test]
fn set_qwen3_fields_roundtrip() {
let req = SynthesizeRequest {
text: "hi".into(),
model: Some("Qwen3-TTS-12Hz-1.7B-Base-5bit".into()),
language: Some("en".into()),
reference_audio_path: Some("/tmp/ref.wav".into()),
reference_text: Some("the reference sentence".into()),
voice_instruction: Some("a warm male voice, medium pace".into()),
strict_capabilities: true,
..SynthesizeRequest::default()
};
let json = serde_json::to_string(&req).unwrap();
let parsed: SynthesizeRequest = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.reference_audio_path.as_deref(), Some("/tmp/ref.wav"));
assert_eq!(
parsed.reference_text.as_deref(),
Some("the reference sentence")
);
assert_eq!(
parsed.voice_instruction.as_deref(),
Some("a warm male voice, medium pace")
);
assert!(parsed.strict_capabilities);
}
#[test]
fn upstream_qwen_terse_keys_are_accepted_as_aliases() {
let json = r#"{
"text": "hi",
"ref_audio": "/tmp/ref.wav",
"ref_text": "sentence",
"instruct": "warm female voice"
}"#;
let req: SynthesizeRequest = serde_json::from_str(json).unwrap();
assert_eq!(req.reference_audio_path.as_deref(), Some("/tmp/ref.wav"));
assert_eq!(req.reference_text.as_deref(), Some("sentence"));
assert_eq!(req.voice_instruction.as_deref(), Some("warm female voice"));
}
#[test]
fn requested_advanced_controls_reports_only_set_fields() {
let req = SynthesizeRequest {
text: "hi".into(),
reference_audio_path: Some("/tmp/ref.wav".into()),
voice_instruction: Some("warm".into()),
..SynthesizeRequest::default()
};
let controls = req.requested_advanced_controls();
assert!(controls.contains(&"reference_audio_path"));
assert!(!controls.contains(&"reference_text"));
assert!(controls.contains(&"voice_instruction"));
assert_eq!(controls.len(), 2);
}
#[test]
fn empty_request_reports_no_advanced_controls() {
let req = SynthesizeRequest::default();
assert!(req.requested_advanced_controls().is_empty());
}
}