1use crate::ast::{AstNode, NodeType};
2use crate::error::Result;
3use crate::formatters::base::{Formatter, FormatterOptions};
4
5pub type TagAttrs = Vec<(String, String)>;
6pub type TagInfo = (String, TagAttrs);
7
8pub fn attrs_insert(attrs: &mut TagAttrs, key: &str, value: String) {
9 if let Some(entry) = attrs.iter_mut().find(|(k, _)| k == key) {
10 entry.1 = value;
11 } else {
12 attrs.push((key.to_string(), value));
13 }
14}
15
16pub fn attrs_merge(existing: &mut TagAttrs, new: TagAttrs) {
17 for (k, v) in new {
18 attrs_insert(existing, &k, v);
19 }
20}
21
22pub fn attrs_get<'a>(attrs: &'a TagAttrs, key: &str) -> Option<&'a str> {
23 attrs
24 .iter()
25 .find(|(k, _)| k == key)
26 .map(|(_, v)| v.as_str())
27}
28
29pub struct SsmlFormatterBase {
30 options: FormatterOptions,
31 tag_sort_order: Vec<String>,
32}
33
34impl SsmlFormatterBase {
35 pub fn new(options: FormatterOptions) -> Self {
36 let tag_sort_order = Self::create_default_tag_order();
37
38 Self {
39 options,
40 tag_sort_order,
41 }
42 }
43
44 fn create_default_tag_order() -> Vec<String> {
45 vec![
46 "emphasis".to_string(),
47 "say-as".to_string(),
48 "prosody".to_string(),
49 "voice".to_string(),
50 "lang".to_string(),
51 "sub".to_string(),
52 "phoneme".to_string(),
53 "amazon:effect".to_string(),
54 "amazon:emotion".to_string(),
55 "amazon:domain".to_string(),
56 ]
57 }
58
59 pub fn format_node_internal(&self, node: &AstNode) -> Result<String> {
60 self.format_node_with_tags(node)
61 }
62
63 fn capitalize_voice_name(name: &str) -> String {
64 let valid_names = [
65 "Ivy",
66 "Joanna",
67 "Joey",
68 "Justin",
69 "Kendra",
70 "Kimberly",
71 "Matthew",
72 "Salli",
73 "Brian",
74 "Amy",
75 "Emma",
76 "Geraint",
77 "Russell",
78 "Nicole",
79 "Celine",
80 "Mathieu",
81 "Dora",
82 "Victor",
83 "Tatyana",
84 "Maxim",
85 "Hans",
86 "Marlene",
87 "Vicki",
88 "Aditi",
89 "Karl",
90 "Giorgio",
91 "Carla",
92 "Bianca",
93 "Lucia",
94 "Mizuki",
95 "Takumi",
96 "Vitoria",
97 "Ricardo",
98 "Ines",
99 "Cristiano",
100 "Lea",
101 "Zhiyu",
102 "Naja",
103 "Mads",
104 "Gwyneth",
105 "Lotte",
106 "Ruben",
107 "Ewa",
108 "Filiz",
109 "Penelope",
110 "Lupe",
111 "Mia",
112 "Conchita",
113 "Enrique",
114 "Miguel",
115 "Penny",
116 "Astrid",
117 "Bjorn",
118 "Sofia",
119 "Kasper",
120 "Seoyeon",
121 "Kendra",
122 "Salli",
123 "Aria",
124 "Jenny",
125 "Guy",
126 "Davis",
127 "Amber",
128 "Ana",
129 "Andrew",
130 "Christopher",
131 "Eric",
132 "Tony",
133 ];
134 let lower = name.to_lowercase();
135 for valid in &valid_names {
136 if valid.to_lowercase() == lower {
137 return valid.to_string();
138 }
139 }
140 name.to_string()
141 }
142
143 fn is_valid_voice_name(name: &str) -> bool {
144 let valid_names = [
145 "Ivy",
146 "Joanna",
147 "Joey",
148 "Justin",
149 "Kendra",
150 "Kimberly",
151 "Matthew",
152 "Salli",
153 "Brian",
154 "Amy",
155 "Emma",
156 "Geraint",
157 "Russell",
158 "Nicole",
159 "Celine",
160 "Mathieu",
161 "Dora",
162 "Victor",
163 "Tatyana",
164 "Maxim",
165 "Hans",
166 "Marlene",
167 "Vicki",
168 "Aditi",
169 "Karl",
170 "Giorgio",
171 "Carla",
172 "Bianca",
173 "Lucia",
174 "Mizuki",
175 "Takumi",
176 "Vitoria",
177 "Ricardo",
178 "Ines",
179 "Cristiano",
180 "Lea",
181 "Zhiyu",
182 "Naja",
183 "Mads",
184 "Gwyneth",
185 "Lotte",
186 "Ruben",
187 "Ewa",
188 "Filiz",
189 "Penelope",
190 "Lupe",
191 "Mia",
192 "Conchita",
193 "Enrique",
194 "Miguel",
195 "Penny",
196 "Astrid",
197 "Bjorn",
198 "Sofia",
199 "Kasper",
200 "Seoyeon",
201 "Aria",
202 "Jenny",
203 "Guy",
204 "Davis",
205 "Amber",
206 "Ana",
207 "Andrew",
208 "Christopher",
209 "Eric",
210 "Tony",
211 ];
212 valid_names.contains(&name)
213 }
214
215 pub fn format_node_with_tags(&self, node: &AstNode) -> Result<String> {
216 match node.node_type {
217 NodeType::Document => self.format_document(node),
218 NodeType::Paragraph => self.format_paragraph(node),
219 NodeType::SimpleLine => self.format_simple_line(node),
220 NodeType::EmptyLine => self.format_empty_line(node),
221 NodeType::Section => self.format_section(node),
222 NodeType::PlainText => Ok(node.text.clone()),
223 NodeType::ShortBreak => self.format_short_break(node),
224 NodeType::Break => self.format_break(node),
225 NodeType::ShortEmphasisModerate => self.format_emphasis(node, "moderate"),
226 NodeType::ShortEmphasisStrong => self.format_emphasis(node, "strong"),
227 NodeType::ShortEmphasisNone => self.format_emphasis(node, "none"),
228 NodeType::ShortEmphasisReduced => self.format_emphasis(node, "reduced"),
229 NodeType::TextModifier => self.format_text_modifier(node),
230 NodeType::Audio => self.format_audio(node),
231 NodeType::Mark => self.format_mark(node),
232 NodeType::ShortIpa => self.format_ipa(node),
233 NodeType::BareIpa => self.format_bare_ipa(node),
234 NodeType::ShortSub => self.format_short_sub(node),
235 _ => Ok(node.text.clone()),
236 }
237 }
238
239 fn format_document(&self, node: &AstNode) -> Result<String> {
240 let mut content = String::new();
241 let mut children_iter = node.children.iter().peekable();
242
243 while let Some(child) = children_iter.next() {
244 if child.node_type == NodeType::Section {
245 let mut section_content_raw = String::new();
246 while let Some(next_child) = children_iter.peek() {
247 if next_child.node_type == NodeType::Section {
248 break;
249 }
250 let next_child = children_iter.next().unwrap();
251 section_content_raw.push_str(&self.format_node_with_tags(next_child)?);
252 }
253 let had_leading_newline = section_content_raw.starts_with('\n');
254 let section_content = if had_leading_newline {
255 §ion_content_raw[1..]
256 } else {
257 §ion_content_raw
258 };
259
260 let section_open = self.format_node_with_tags(child)?;
261 let section_close = if !section_open.is_empty() {
262 self.format_section_close(child)?
263 } else {
264 String::new()
265 };
266
267 if !section_open.is_empty() {
268 content.push_str(§ion_open);
269 if had_leading_newline {
270 content.push('\n');
271 }
272 content.push_str(section_content);
273 content.push_str(§ion_close);
274 if had_leading_newline {
275 content.push('\n');
276 }
277 } else {
278 content.push_str(section_content);
279 }
280 } else {
281 content.push_str(&self.format_node_with_tags(child)?);
282 }
283 }
284
285 if self.options.include_speak_tag {
286 let trimmed = content.trim_end_matches('\n');
287 Ok(format!("<speak>\n{}\n</speak>", trimmed))
288 } else {
289 Ok(content)
290 }
291 }
292
293 fn format_paragraph(&self, node: &AstNode) -> Result<String> {
294 let mut content = String::new();
295 for child in &node.children {
296 content.push_str(&self.format_node_with_tags(child)?);
297 }
298 if self.options.include_paragraph_tag {
299 Ok(format!("<p>{}</p>", content))
300 } else {
301 Ok(content)
302 }
303 }
304
305 fn format_simple_line(&self, node: &AstNode) -> Result<String> {
306 let mut content = String::new();
307 for child in &node.children {
308 content.push_str(&self.format_node_with_tags(child)?);
309 }
310 Ok(content)
311 }
312
313 fn format_empty_line(&self, _node: &AstNode) -> Result<String> {
314 if self.options.preserve_empty_lines {
315 Ok("\n".to_string())
316 } else {
317 Ok(String::new())
318 }
319 }
320
321 fn format_section(&self, node: &AstNode) -> Result<String> {
322 let mut tags: Vec<TagInfo> = Vec::new();
323
324 if let Some(style) = node.attributes.get("style") {
325 if style != "defaults" {
326 if let Some(tag_info) = self.attribute_to_tag(style, "") {
327 tags.push(tag_info);
328 }
329 }
330 }
331
332 for key in &node.attribute_keys {
333 let value = match node.attributes.get(key) {
334 Some(v) => v,
335 None => continue,
336 };
337 if key == "style" {
338 continue;
339 }
340 if let Some(tag_info) = self.attribute_to_tag(key, value) {
341 let tag_name = tag_info.0.clone();
342 if tag_name == "prosody" {
343 if let Some(existing) = tags.iter_mut().find(|(name, _)| name == "prosody") {
344 attrs_merge(&mut existing.1, tag_info.1);
345 continue;
346 }
347 }
348 tags.push(tag_info);
349 }
350 }
351
352 let section_tag_order = ["voice", "lang", "prosody", "emphasis"];
353 tags.sort_by_key(|(tag_name, _)| {
354 section_tag_order
355 .iter()
356 .position(|t| t == tag_name)
357 .unwrap_or(usize::MAX)
358 });
359
360 if tags.is_empty() {
361 return Ok(String::new());
362 }
363
364 let mut result = String::new();
365 for (i, (tag_name, attrs)) in tags.iter().enumerate() {
366 let attr_string = format_attr_string_ordered(tag_name, attrs);
367 if i > 0 {
368 result.push('\n');
369 }
370 if attr_string.is_empty() {
371 result.push_str(&format!("<{}>", tag_name));
372 } else {
373 result.push_str(&format!("<{} {}>", tag_name, attr_string));
374 }
375 }
376 Ok(result)
377 }
378
379 pub fn format_section_close(&self, node: &AstNode) -> Result<String> {
380 let mut tags: Vec<TagInfo> = Vec::new();
381
382 if let Some(style) = node.attributes.get("style") {
383 if style != "defaults" {
384 if let Some(tag_info) = self.attribute_to_tag(style, "") {
385 tags.push(tag_info);
386 }
387 }
388 }
389
390 for key in &node.attribute_keys {
391 let value = match node.attributes.get(key) {
392 Some(v) => v,
393 None => continue,
394 };
395 if key == "style" {
396 continue;
397 }
398 if let Some(tag_info) = self.attribute_to_tag(key, value) {
399 let tag_name = tag_info.0.clone();
400 if tag_name == "prosody" {
401 if let Some(existing) = tags.iter_mut().find(|(name, _)| name == "prosody") {
402 attrs_merge(&mut existing.1, tag_info.1);
403 continue;
404 }
405 }
406 tags.push(tag_info);
407 }
408 }
409
410 let section_tag_order = ["voice", "lang", "prosody", "emphasis"];
411 tags.sort_by_key(|(tag_name, _)| {
412 section_tag_order
413 .iter()
414 .position(|t| t == tag_name)
415 .unwrap_or(usize::MAX)
416 });
417
418 if tags.is_empty() {
419 return Ok(String::new());
420 }
421
422 let mut result = String::new();
423 for (i, (tag_name, _)) in tags.iter().rev().enumerate() {
424 result.push_str(&format!("</{}>", tag_name));
425 if i < tags.len() - 1 {
426 result.push('\n');
427 }
428 }
429 Ok(result)
430 }
431
432 fn format_short_break(&self, node: &AstNode) -> Result<String> {
433 let time = node.text.trim_start_matches('[').trim_end_matches(']');
434 Ok(format!("<break time=\"{}\"/>", time))
435 }
436
437 fn format_break(&self, node: &AstNode) -> Result<String> {
438 let strength = node
439 .attributes
440 .get("strength")
441 .unwrap_or(&node.text)
442 .clone();
443 Ok(format!("<break strength=\"{}\"/>", strength))
444 }
445
446 fn format_emphasis(&self, node: &AstNode, level: &str) -> Result<String> {
447 Ok(format!(
448 "<emphasis level=\"{}\">{}</emphasis>",
449 level,
450 self.escape_xml(&node.text)
451 ))
452 }
453
454 fn format_text_modifier(&self, node: &AstNode) -> Result<String> {
455 let mut tags: Vec<TagInfo> = Vec::new();
456 let mut last_say_as: Option<TagInfo> = None;
457
458 for key in &node.attribute_keys {
459 let value = match node.attributes.get(key) {
460 Some(v) => v,
461 None => continue,
462 };
463 if let Some(tag_info) = self.attribute_to_tag(key, value) {
464 let tag_name = tag_info.0.clone();
465 if tag_name == "prosody" {
466 if let Some(existing) = tags.iter_mut().find(|(name, _)| name == "prosody") {
467 attrs_merge(&mut existing.1, tag_info.1);
468 continue;
469 }
470 }
471 if tag_name == "say-as" {
472 last_say_as = Some(tag_info);
473 continue;
474 }
475 tags.push(tag_info);
476 }
477 }
478
479 if let Some(say_as) = last_say_as {
480 tags.push(say_as);
481 }
482
483 if tags.is_empty() {
484 return Ok(node.text.clone());
485 }
486
487 self.apply_tags_to_text(&node.text, &tags)
488 }
489
490 fn format_audio(&self, node: &AstNode) -> Result<String> {
491 let src = node.attributes.get("src").unwrap_or(&String::new()).clone();
492 let caption = &node.text;
493 if caption.is_empty() {
494 Ok(format!("<audio src=\"{}\"/>", src))
495 } else {
496 Ok(format!(
497 "<audio src=\"{}\">\n<desc>{}</desc>\n</audio>",
498 src,
499 self.escape_xml(caption)
500 ))
501 }
502 }
503
504 fn format_mark(&self, node: &AstNode) -> Result<String> {
505 Ok(format!("<mark name=\"{}\"/>", self.escape_xml(&node.text)))
506 }
507
508 fn format_ipa(&self, node: &AstNode) -> Result<String> {
509 let phoneme = node
510 .attributes
511 .get("phoneme")
512 .unwrap_or(&String::new())
513 .clone();
514 if phoneme.is_empty() {
515 Ok(self.escape_xml(&node.text))
516 } else {
517 Ok(format!(
518 "<phoneme alphabet=\"ipa\" ph=\"{}\">{}</phoneme>",
519 self.escape_xml(&phoneme),
520 self.escape_xml(&node.text)
521 ))
522 }
523 }
524
525 fn format_bare_ipa(&self, node: &AstNode) -> Result<String> {
526 let phoneme = node.attributes.get("ph").unwrap_or(&node.text).clone();
527 Ok(format!(
528 "<phoneme alphabet=\"ipa\" ph=\"{}\">ipa</phoneme>",
529 self.escape_xml(&phoneme)
530 ))
531 }
532
533 fn format_short_sub(&self, node: &AstNode) -> Result<String> {
534 let alias = node
535 .attributes
536 .get("alias")
537 .unwrap_or(&String::new())
538 .clone();
539 if alias.is_empty() {
540 Ok(self.escape_xml(&node.text))
541 } else {
542 Ok(format!(
543 "<sub alias=\"{}\">{}</sub>",
544 self.escape_xml(&alias),
545 self.escape_xml(&node.text)
546 ))
547 }
548 }
549
550 pub fn attribute_to_tag(&self, key: &str, value: &str) -> Option<TagInfo> {
551 let mut attributes: TagAttrs = Vec::new();
552
553 match key.to_lowercase().as_str() {
554 "address" => Some(("say-as".to_string(), {
555 vec![("interpret-as".to_string(), "address".to_string())]
556 })),
557 "date" => Some(("say-as".to_string(), {
558 let mut attrs = vec![("interpret-as".to_string(), "date".to_string())];
559 if !value.is_empty() {
560 attrs.push(("format".to_string(), value.to_string()));
561 }
562 attrs
563 })),
564 "time" => Some(("say-as".to_string(), {
565 let mut attrs = Vec::new();
566 if !value.is_empty() {
567 attrs.push(("format".to_string(), value.to_string()));
568 }
569 attrs.push(("interpret-as".to_string(), "time".to_string()));
570 attrs
571 })),
572 "number" | "cardinal" => Some(("say-as".to_string(), {
573 vec![("interpret-as".to_string(), "number".to_string())]
574 })),
575 "ordinal" => Some(("say-as".to_string(), {
576 vec![("interpret-as".to_string(), "ordinal".to_string())]
577 })),
578 "characters" | "chars" | "digits" | "drc" => Some(("say-as".to_string(), {
579 vec![("interpret-as".to_string(), "characters".to_string())]
580 })),
581 "fraction" => Some(("say-as".to_string(), {
582 vec![("interpret-as".to_string(), "fraction".to_string())]
583 })),
584 "unit" => Some(("say-as".to_string(), {
585 vec![("interpret-as".to_string(), "unit".to_string())]
586 })),
587 "interjection" => Some(("say-as".to_string(), {
588 vec![("interpret-as".to_string(), "interjection".to_string())]
589 })),
590 "expletive" | "bleep" => Some(("say-as".to_string(), {
591 vec![("interpret-as".to_string(), "expletive".to_string())]
592 })),
593 "telephone" | "phone" => Some(("say-as".to_string(), {
594 vec![("interpret-as".to_string(), "telephone".to_string())]
595 })),
596 "ipa" => Some(("phoneme".to_string(), {
597 let mut attrs = vec![("alphabet".to_string(), "ipa".to_string())];
598 if !value.is_empty() {
599 attrs.push(("ph".to_string(), value.to_string()));
600 }
601 attrs
602 })),
603 "sub" => {
604 if !value.is_empty() {
605 attributes.push(("alias".to_string(), value.to_string()));
606 }
607 Some(("sub".to_string(), attributes))
608 }
609 "voice" => {
610 if value.is_empty() || value == "device" {
611 return None;
612 }
613 let name = Self::capitalize_voice_name(value);
614 if !Self::is_valid_voice_name(&name) {
615 return None;
616 }
617 attributes.push(("name".to_string(), name));
618 Some(("voice".to_string(), attributes))
619 }
620 "lang" => {
621 if !value.is_empty() {
622 attributes.push(("xml:lang".to_string(), value.to_string()));
623 }
624 Some(("lang".to_string(), attributes))
625 }
626 "rate" => {
627 let rate_val = if value.is_empty() { "medium" } else { value };
628 attributes.push(("rate".to_string(), rate_val.to_string()));
629 Some(("prosody".to_string(), attributes))
630 }
631 "pitch" => {
632 let pitch_val = if value.is_empty() { "medium" } else { value };
633 attributes.push(("pitch".to_string(), pitch_val.to_string()));
634 Some(("prosody".to_string(), attributes))
635 }
636 "volume" | "vol" => {
637 let vol_val = if value.is_empty() { "medium" } else { value };
638 attributes.push(("volume".to_string(), vol_val.to_string()));
639 Some(("prosody".to_string(), attributes))
640 }
641 "timbre" => {
642 let timbre_val = if value.is_empty() { "medium" } else { value };
643 attributes.push(("pitch".to_string(), timbre_val.to_string()));
644 Some(("prosody".to_string(), attributes))
645 }
646 "emphasis" => {
647 let level = if value.is_empty() { "moderate" } else { value };
648 attributes.push(("level".to_string(), level.to_string()));
649 Some(("emphasis".to_string(), attributes))
650 }
651 "whisper" => Some(("amazon:effect".to_string(), {
652 vec![("name".to_string(), "whispered".to_string())]
653 })),
654 "excited" => {
655 let lower_val = value.to_lowercase();
656 if value.is_empty() {
657 Some(("amazon:emotion".to_string(), {
658 vec![
659 ("name".to_string(), "excited".to_string()),
660 ("intensity".to_string(), "medium".to_string()),
661 ]
662 }))
663 } else if matches!(lower_val.as_str(), "low" | "medium" | "high") {
664 Some(("amazon:emotion".to_string(), {
665 vec![
666 ("name".to_string(), "excited".to_string()),
667 ("intensity".to_string(), lower_val),
668 ]
669 }))
670 } else {
671 None
672 }
673 }
674 "disappointed" => {
675 let lower_val = value.to_lowercase();
676 if value.is_empty() {
677 Some(("amazon:emotion".to_string(), {
678 vec![
679 ("name".to_string(), "disappointed".to_string()),
680 ("intensity".to_string(), "medium".to_string()),
681 ]
682 }))
683 } else if matches!(lower_val.as_str(), "low" | "medium" | "high") {
684 Some(("amazon:emotion".to_string(), {
685 vec![
686 ("name".to_string(), "disappointed".to_string()),
687 ("intensity".to_string(), lower_val),
688 ]
689 }))
690 } else {
691 None
692 }
693 }
694 "dj" => Some(("amazon:domain".to_string(), {
695 vec![("name".to_string(), "music".to_string())]
696 })),
697 "newscaster" => Some(("amazon:domain".to_string(), {
698 vec![("name".to_string(), "news".to_string())]
699 })),
700 _ => None,
701 }
702 }
703
704 pub fn apply_tags_to_text(&self, text: &str, tags: &[TagInfo]) -> Result<String> {
705 let mut current_text = text.to_string();
706
707 let mut sorted_tags = tags.to_vec();
708 sorted_tags.sort_by_key(|(tag_name, _)| {
709 self.tag_sort_order
710 .iter()
711 .position(|t| t == tag_name)
712 .unwrap_or(usize::MAX)
713 });
714
715 for (tag_name, attributes) in sorted_tags.iter().rev() {
716 let attr_string = format_attr_string_ordered(tag_name, attributes);
717
718 if attr_string.is_empty() {
719 current_text = format!("<{}>{}</{}>", tag_name, current_text, tag_name);
720 } else {
721 current_text = format!(
722 "<{} {}>{}</{}>",
723 tag_name, attr_string, current_text, tag_name
724 );
725 }
726 }
727
728 Ok(current_text)
729 }
730
731 pub fn escape_xml(&self, text: &str) -> String {
732 text.replace('&', "&")
733 .replace('<', "<")
734 .replace('>', ">")
735 }
736}
737
738pub fn format_attr_string_ordered(tag_name: &str, attributes: &TagAttrs) -> String {
739 let fixed_order: Vec<&str> = match tag_name {
740 "say-as" => vec!["interpret-as", "format"],
741 "phoneme" => vec!["alphabet", "ph"],
742 "voice" => vec!["name"],
743 "lang" => vec!["xml:lang"],
744 "emphasis" => vec!["level"],
745 "amazon:effect" => vec!["name"],
746 "amazon:emotion" => vec!["name", "intensity"],
747 "amazon:domain" => vec!["name"],
748 "mstts:express-as" => vec!["style"],
749 "sub" => vec!["alias"],
750 "prosody" => vec![],
751 "google:style" => vec![],
752 _ => vec![],
753 };
754
755 if fixed_order.is_empty() && !attributes.is_empty() {
756 let mut seen = std::collections::HashSet::new();
757 let mut parts: Vec<String> = Vec::new();
758 for (key, value) in attributes {
759 if seen.insert(key.clone()) {
760 parts.push(format!("{}=\"{}\"", key, value));
761 }
762 }
763 return parts.join(" ");
764 }
765
766 let mut parts: Vec<String> = Vec::new();
767 let mut seen = std::collections::HashSet::new();
768 for key in &fixed_order {
769 if let Some(value) = attrs_get(attributes, key) {
770 if seen.insert(key.to_string()) {
771 parts.push(format!("{}=\"{}\"", key, value));
772 }
773 }
774 }
775 for (key, value) in attributes {
776 if !fixed_order.contains(&key.as_str()) && seen.insert(key.clone()) {
777 parts.push(format!("{}=\"{}\"", key, value));
778 }
779 }
780 parts.join(" ")
781}
782
783impl Formatter for SsmlFormatterBase {
784 fn format(&self, ast: &AstNode) -> Result<String> {
785 self.format_node_with_tags(ast)
786 }
787
788 fn format_node(&self, node: &AstNode) -> Result<String> {
789 self.format_node_with_tags(node)
790 }
791}