1use crate::error::{CliError, Result};
4use once_cell::sync::Lazy;
5use regex::Regex;
6use std::collections::HashMap;
7
8static RE_SPEAK: Lazy<Regex> =
10 Lazy::new(|| Regex::new(r"<speak[^>]*>.*</speak>").expect("Invalid SPEAK regex pattern"));
11static RE_VOICE: Lazy<Regex> =
12 Lazy::new(|| Regex::new(r"<voice[^>]*>.*</voice>").expect("Invalid VOICE regex pattern"));
13static RE_PROSODY: Lazy<Regex> =
14 Lazy::new(|| Regex::new(r"<prosody[^>]*>.*</prosody>").expect("Invalid PROSODY regex pattern"));
15static RE_BREAK: Lazy<Regex> =
16 Lazy::new(|| Regex::new(r"<break[^/>]*/>").expect("Invalid BREAK regex pattern"));
17static RE_EMPHASIS: Lazy<Regex> = Lazy::new(|| {
18 Regex::new(r"<emphasis[^>]*>.*</emphasis>").expect("Invalid EMPHASIS regex pattern")
19});
20static RE_SAY_AS: Lazy<Regex> =
21 Lazy::new(|| Regex::new(r"<say-as[^>]*>.*</say-as>").expect("Invalid SAY-AS regex pattern"));
22static RE_PHONEME: Lazy<Regex> =
23 Lazy::new(|| Regex::new(r"<phoneme[^>]*>.*</phoneme>").expect("Invalid PHONEME regex pattern"));
24static RE_SUB: Lazy<Regex> =
25 Lazy::new(|| Regex::new(r"<sub[^>]*>.*</sub>").expect("Invalid SUB regex pattern"));
26static RE_TAG: Lazy<Regex> =
27 Lazy::new(|| Regex::new(r"<(/?)(\w+)(?:[^>]*)>").expect("Invalid TAG regex pattern"));
28static RE_TAG_REMOVE: Lazy<Regex> =
29 Lazy::new(|| Regex::new(r"<[^>]*>").expect("Invalid TAG_REMOVE regex pattern"));
30static RE_WHITESPACE: Lazy<Regex> =
31 Lazy::new(|| Regex::new(r"\s+").expect("Invalid WHITESPACE regex pattern"));
32static RE_PROSODY_TAG: Lazy<Regex> =
33 Lazy::new(|| Regex::new(r#"<prosody\s+([^>]+)>"#).expect("Invalid PROSODY_TAG regex pattern"));
34static RE_RATE: Lazy<Regex> =
35 Lazy::new(|| Regex::new(r#"rate\s*=\s*["']([^"']+)["']"#).expect("Invalid RATE regex pattern"));
36static RE_PITCH: Lazy<Regex> = Lazy::new(|| {
37 Regex::new(r#"pitch\s*=\s*["']([^"']+)["']"#).expect("Invalid PITCH regex pattern")
38});
39static RE_VOLUME: Lazy<Regex> = Lazy::new(|| {
40 Regex::new(r#"volume\s*=\s*["']([^"']+)["']"#).expect("Invalid VOLUME regex pattern")
41});
42static RE_VOICE_NAME: Lazy<Regex> = Lazy::new(|| {
43 Regex::new(r#"<voice\s+name\s*=\s*["']([^"']+)["']"#).expect("Invalid VOICE_NAME regex pattern")
44});
45
46pub struct SsmlProcessor {
48 patterns: HashMap<String, &'static Lazy<Regex>>,
50}
51
52impl Default for SsmlProcessor {
53 fn default() -> Self {
54 Self::new()
55 }
56}
57
58impl SsmlProcessor {
59 pub fn new() -> Self {
61 let mut patterns = HashMap::new();
62
63 patterns.insert("speak".to_string(), &RE_SPEAK);
65 patterns.insert("voice".to_string(), &RE_VOICE);
66 patterns.insert("prosody".to_string(), &RE_PROSODY);
67 patterns.insert("break".to_string(), &RE_BREAK);
68 patterns.insert("emphasis".to_string(), &RE_EMPHASIS);
69 patterns.insert("say-as".to_string(), &RE_SAY_AS);
70 patterns.insert("phoneme".to_string(), &RE_PHONEME);
71 patterns.insert("sub".to_string(), &RE_SUB);
72
73 Self { patterns }
74 }
75
76 pub fn is_ssml(&self, text: &str) -> bool {
78 text.trim_start().starts_with('<') && text.contains("</")
79 }
80
81 pub fn validate(&self, ssml: &str) -> Result<Vec<SsmlValidationIssue>> {
83 let mut issues = Vec::new();
84
85 if !ssml.trim().starts_with("<speak") {
87 issues.push(SsmlValidationIssue {
88 issue_type: SsmlIssueType::Error,
89 message: "SSML must start with <speak> tag".to_string(),
90 line: 1,
91 column: 1,
92 suggestion: Some("Wrap your content in <speak>...</speak> tags".to_string()),
93 });
94 }
95
96 if !ssml.trim().ends_with("</speak>") {
97 let line_count = ssml.lines().count();
98 let last_line_len = ssml.lines().last().map(|l| l.len()).unwrap_or(0);
99
100 issues.push(SsmlValidationIssue {
101 issue_type: SsmlIssueType::Error,
102 message: "SSML must end with </speak> tag".to_string(),
103 line: line_count,
104 column: last_line_len,
105 suggestion: Some("Add closing </speak> tag".to_string()),
106 });
107 }
108
109 issues.extend(self.validate_tag_balance(ssml)?);
111
112 issues.extend(self.validate_attributes(ssml)?);
114
115 Ok(issues)
116 }
117
118 fn validate_tag_balance(&self, ssml: &str) -> Result<Vec<SsmlValidationIssue>> {
120 let mut issues = Vec::new();
121 let mut tag_stack = Vec::new();
122
123 for (line_num, line) in ssml.lines().enumerate() {
124 for cap in RE_TAG.captures_iter(line) {
125 let is_closing = !cap[1].is_empty();
126 let tag_name = &cap[2];
127
128 if line.contains(&format!("<{}", tag_name)) && line.contains("/>") {
130 continue;
131 }
132
133 if is_closing {
134 if let Some(last_tag) = tag_stack.pop() {
135 if last_tag != tag_name {
136 issues.push(SsmlValidationIssue {
137 issue_type: SsmlIssueType::Error,
138 message: format!(
139 "Mismatched closing tag: expected </{}>, found </{}>",
140 last_tag, tag_name
141 ),
142 line: line_num + 1,
143 column: line.find(&cap[0]).unwrap_or(0) + 1,
144 suggestion: Some(format!("Change to </{}>", last_tag)),
145 });
146 }
147 } else {
148 issues.push(SsmlValidationIssue {
149 issue_type: SsmlIssueType::Error,
150 message: format!("Unexpected closing tag: </{}>", tag_name),
151 line: line_num + 1,
152 column: line.find(&cap[0]).unwrap_or(0) + 1,
153 suggestion: Some(
154 "Remove this closing tag or add matching opening tag".to_string(),
155 ),
156 });
157 }
158 } else {
159 tag_stack.push(tag_name.to_string());
160 }
161 }
162 }
163
164 let line_count = ssml.lines().count();
166 let last_line_len = ssml.lines().last().map(|l| l.len()).unwrap_or(0);
167
168 for unclosed_tag in tag_stack {
169 issues.push(SsmlValidationIssue {
170 issue_type: SsmlIssueType::Error,
171 message: format!("Unclosed tag: <{}>", unclosed_tag),
172 line: line_count,
173 column: last_line_len,
174 suggestion: Some(format!("Add closing tag: </{}>", unclosed_tag)),
175 });
176 }
177
178 Ok(issues)
179 }
180
181 fn validate_attributes(&self, ssml: &str) -> Result<Vec<SsmlValidationIssue>> {
183 let mut issues = Vec::new();
184
185 for (line_num, line) in ssml.lines().enumerate() {
186 if let Some(cap) = RE_PROSODY_TAG.captures(line) {
187 let attributes = &cap[1];
188
189 if let Some(rate_match) = RE_RATE.captures(attributes) {
191 let rate_value = &rate_match[1];
192 if !self.is_valid_prosody_rate(rate_value) {
193 issues.push(SsmlValidationIssue {
194 issue_type: SsmlIssueType::Warning,
195 message: format!("Invalid prosody rate: '{rate_value}'"),
196 line: line_num + 1,
197 column: line.find(rate_value).unwrap_or(0) + 1,
198 suggestion: Some("Use values like: x-slow, slow, medium, fast, x-fast, or percentage/Hz values".to_string()),
199 });
200 }
201 }
202
203 if let Some(pitch_match) = RE_PITCH.captures(attributes) {
205 let pitch_value = &pitch_match[1];
206 if !self.is_valid_prosody_pitch(pitch_value) {
207 issues.push(SsmlValidationIssue {
208 issue_type: SsmlIssueType::Warning,
209 message: format!("Invalid prosody pitch: '{pitch_value}'"),
210 line: line_num + 1,
211 column: line.find(pitch_value).unwrap_or(0) + 1,
212 suggestion: Some("Use values like: x-low, low, medium, high, x-high, or Hz/semitone values".to_string()),
213 });
214 }
215 }
216
217 if let Some(volume_match) = RE_VOLUME.captures(attributes) {
219 let volume_value = &volume_match[1];
220 if !self.is_valid_prosody_volume(volume_value) {
221 issues.push(SsmlValidationIssue {
222 issue_type: SsmlIssueType::Warning,
223 message: format!("Invalid prosody volume: '{volume_value}'"),
224 line: line_num + 1,
225 column: line.find(volume_value).unwrap_or(0) + 1,
226 suggestion: Some("Use values like: silent, x-soft, soft, medium, loud, x-loud, or dB values".to_string()),
227 });
228 }
229 }
230 }
231 }
232
233 Ok(issues)
234 }
235
236 fn is_valid_prosody_rate(&self, value: &str) -> bool {
238 matches!(value, "x-slow" | "slow" | "medium" | "fast" | "x-fast")
239 || value.ends_with('%')
240 || value.ends_with("Hz")
241 || value.parse::<f32>().is_ok()
242 }
243
244 fn is_valid_prosody_pitch(&self, value: &str) -> bool {
246 matches!(value, "x-low" | "low" | "medium" | "high" | "x-high")
247 || value.ends_with("Hz")
248 || value.ends_with("st")
249 || value.starts_with('+')
250 || value.starts_with('-')
251 || value.parse::<f32>().is_ok()
252 }
253
254 fn is_valid_prosody_volume(&self, value: &str) -> bool {
256 matches!(
257 value,
258 "silent" | "x-soft" | "soft" | "medium" | "loud" | "x-loud"
259 ) || value.ends_with("dB")
260 || value.starts_with('+')
261 || value.starts_with('-')
262 || value.parse::<f32>().is_ok()
263 }
264
265 pub fn to_plain_text(&self, ssml: &str) -> String {
267 let text = RE_TAG_REMOVE.replace_all(ssml, "");
269
270 let text = RE_WHITESPACE.replace_all(&text, " ");
272
273 text.trim().to_string()
274 }
275
276 pub fn extract_synthesis_params(&self, ssml: &str) -> SsmlSynthesisParams {
278 let mut params = SsmlSynthesisParams::default();
279
280 if let Some(voice_match) = RE_VOICE_NAME.captures(ssml) {
282 params.voice = Some(voice_match[1].to_string());
283 }
284
285 if let Some(prosody_match) = RE_PROSODY_TAG.captures(ssml) {
287 let attributes = &prosody_match[1];
288
289 if let Some(rate_match) = RE_RATE.captures(attributes) {
290 params.speaking_rate = self.parse_rate_value(&rate_match[1]);
291 }
292
293 if let Some(pitch_match) = RE_PITCH.captures(attributes) {
294 params.pitch_shift = self.parse_pitch_value(&pitch_match[1]);
295 }
296
297 if let Some(volume_match) = RE_VOLUME.captures(attributes) {
298 params.volume_gain = self.parse_volume_value(&volume_match[1]);
299 }
300 }
301
302 params
303 }
304
305 fn parse_rate_value(&self, value: &str) -> Option<f32> {
307 match value {
308 "x-slow" => Some(0.5),
309 "slow" => Some(0.75),
310 "medium" => Some(1.0),
311 "fast" => Some(1.25),
312 "x-fast" => Some(1.5),
313 _ => {
314 if value.ends_with('%') {
315 value
316 .trim_end_matches('%')
317 .parse::<f32>()
318 .ok()
319 .map(|v| v / 100.0)
320 } else {
321 value.parse::<f32>().ok()
322 }
323 }
324 }
325 }
326
327 fn parse_pitch_value(&self, value: &str) -> Option<f32> {
329 match value {
330 "x-low" => Some(-6.0),
331 "low" => Some(-3.0),
332 "medium" => Some(0.0),
333 "high" => Some(3.0),
334 "x-high" => Some(6.0),
335 _ => {
336 if value.ends_with("st") {
337 value.trim_end_matches("st").parse::<f32>().ok()
338 } else if value.ends_with("Hz") {
339 value.trim_end_matches("Hz").parse::<f32>().ok().map(|hz| {
341 (hz - 200.0) / 20.0
343 })
344 } else {
345 value.parse::<f32>().ok()
346 }
347 }
348 }
349 }
350
351 fn parse_volume_value(&self, value: &str) -> Option<f32> {
353 match value {
354 "silent" => Some(-60.0),
355 "x-soft" => Some(-20.0),
356 "soft" => Some(-10.0),
357 "medium" => Some(0.0),
358 "loud" => Some(6.0),
359 "x-loud" => Some(12.0),
360 _ => {
361 if value.ends_with("dB") {
362 value.trim_end_matches("dB").parse::<f32>().ok()
363 } else {
364 value.parse::<f32>().ok()
365 }
366 }
367 }
368 }
369}
370
371#[derive(Debug, Clone)]
373pub struct SsmlValidationIssue {
374 pub issue_type: SsmlIssueType,
375 pub message: String,
376 pub line: usize,
377 pub column: usize,
378 pub suggestion: Option<String>,
379}
380
381#[derive(Debug, Clone, PartialEq)]
383pub enum SsmlIssueType {
384 Error,
385 Warning,
386 Info,
387}
388
389#[derive(Debug, Default)]
391pub struct SsmlSynthesisParams {
392 pub voice: Option<String>,
393 pub speaking_rate: Option<f32>,
394 pub pitch_shift: Option<f32>,
395 pub volume_gain: Option<f32>,
396}
397
398pub mod utils {
400 use super::*;
401
402 pub fn wrap_in_speak(text: &str) -> String {
404 if text.trim_start().starts_with("<speak") {
405 text.to_string()
406 } else {
407 format!("<speak>{}</speak>", text)
408 }
409 }
410
411 pub fn with_prosody(
413 text: &str,
414 rate: Option<f32>,
415 pitch: Option<f32>,
416 volume: Option<f32>,
417 ) -> String {
418 let mut prosody_attrs = Vec::new();
419
420 if let Some(rate) = rate {
421 prosody_attrs.push(format!(
422 "rate=\"{}\"",
423 if rate < 1.0 {
424 "slow"
425 } else if rate > 1.0 {
426 "fast"
427 } else {
428 "medium"
429 }
430 ));
431 }
432
433 if let Some(pitch) = pitch {
434 prosody_attrs.push(format!("pitch=\"{}st\"", pitch));
435 }
436
437 if let Some(volume) = volume {
438 prosody_attrs.push(format!("volume=\"{}dB\"", volume));
439 }
440
441 if prosody_attrs.is_empty() {
442 wrap_in_speak(text)
443 } else {
444 wrap_in_speak(&format!(
445 "<prosody {}>{}</prosody>",
446 prosody_attrs.join(" "),
447 text
448 ))
449 }
450 }
451
452 pub fn add_break(time: &str) -> String {
454 format!("<break time=\"{}\"/>", time)
455 }
456
457 pub fn add_emphasis(text: &str, level: &str) -> String {
459 format!("<emphasis level=\"{}\">{}</emphasis>", level, text)
460 }
461}
462
463pub fn process_ssml(text: &str) -> crate::error::Result<String> {
466 let processor = SsmlProcessor::new();
467
468 if !processor.is_ssml(text) {
470 return Ok(utils::wrap_in_speak(text));
472 }
473
474 let issues = processor.validate(text)?;
476
477 let errors: Vec<_> = issues
479 .iter()
480 .filter(|issue| matches!(issue.issue_type, SsmlIssueType::Error))
481 .collect();
482
483 if !errors.is_empty() {
484 let error_messages: Vec<String> = errors
485 .iter()
486 .map(|error| format!("Line {}: {}", error.line, error.message))
487 .collect();
488 return Err(crate::error::CliError::ValidationError(format!(
489 "SSML validation failed:\n{}",
490 error_messages.join("\n")
491 )));
492 }
493
494 Ok(text.to_string())
497}
498
499#[cfg(test)]
500mod tests {
501 use super::*;
502
503 #[test]
504 fn test_is_ssml() {
505 let processor = SsmlProcessor::new();
506
507 assert!(processor.is_ssml("<speak>Hello</speak>"));
508 assert!(processor.is_ssml(" <voice>Text</voice>"));
509 assert!(!processor.is_ssml("Plain text"));
510 assert!(!processor.is_ssml("Text with <emphasis> but no closing"));
511 }
512
513 #[test]
514 fn test_to_plain_text() {
515 let processor = SsmlProcessor::new();
516
517 let ssml =
518 "<speak><prosody rate=\"slow\">Hello <emphasis>world</emphasis></prosody></speak>";
519 let plain = processor.to_plain_text(ssml);
520 assert_eq!(plain, "Hello world");
521 }
522
523 #[test]
524 fn test_wrap_in_speak() {
525 assert_eq!(utils::wrap_in_speak("Hello"), "<speak>Hello</speak>");
526 assert_eq!(
527 utils::wrap_in_speak("<speak>Hello</speak>"),
528 "<speak>Hello</speak>"
529 );
530 }
531
532 #[test]
533 fn test_extract_synthesis_params() {
534 let processor = SsmlProcessor::new();
535
536 let ssml = r#"<speak><voice name="female-voice"><prosody rate="fast" pitch="high" volume="loud">Hello</prosody></voice></speak>"#;
537 let params = processor.extract_synthesis_params(ssml);
538
539 assert_eq!(params.voice, Some("female-voice".to_string()));
540 assert_eq!(params.speaking_rate, Some(1.25));
541 assert_eq!(params.pitch_shift, Some(3.0));
542 assert_eq!(params.volume_gain, Some(6.0));
543 }
544}