#![doc = include_str!("../README.md")]
#![deny(
warnings,
bad_style,
dead_code,
improper_ctypes,
non_shorthand_field_patterns,
no_mangle_generic_items,
overflowing_literals,
path_statements,
patterns_in_fns_without_body,
unconditional_recursion,
unused,
unused_allocation,
unused_comparisons,
unused_parens,
while_true,
missing_debug_implementations,
missing_docs,
trivial_casts,
trivial_numeric_casts,
unused_extern_crates,
unused_import_braces,
unused_qualifications,
unused_results,
unreachable_pub,
deprecated,
unknown_lints,
unreachable_code,
unused_mut
)]
use chumsky::prelude::*;
use serde::{Deserialize, Serialize};
use std::{collections::HashMap, time::Duration};
mod break_strength;
mod ser;
pub use break_strength::BreakStrength;
mod functions;
pub use functions::*;
#[derive(Debug, Clone, PartialEq, Default)]
pub struct SSML {
pub elements: Vec<SsmlElement>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type", content = "data")]
pub enum SsmlElement {
Voice {
name: String,
children: Vec<SsmlElement>,
},
Speak {
version: Option<String>,
xmlns: Option<String>,
lang: Option<String>,
children: Vec<SsmlElement>,
},
Paragraph {
children: Vec<SsmlElement>,
},
Sentence {
children: Vec<SsmlElement>,
},
Phoneme {
alphabet: String,
ph: String,
children: Vec<SsmlElement>,
},
SayAs {
interpret_as: String,
format: String,
detail: String,
children: Vec<SsmlElement>,
},
Sub {
alias: String,
children: Vec<SsmlElement>,
},
Prosody {
rate: String,
pitch: String,
contour: String,
range: String,
volume: String,
children: Vec<SsmlElement>,
},
Emphasis {
level: String,
children: Vec<SsmlElement>,
},
Break {
time: Option<Duration>,
strength: Option<BreakStrength>,
},
Mark {
name: String,
},
Audio {
src: String,
children: Vec<SsmlElement>,
},
Desc {
children: Vec<SsmlElement>,
},
LexiconUri {
uri: String,
},
Lang {
xml_lang: String,
children: Vec<SsmlElement>,
},
Text(String),
}
fn attr_ident() -> impl Parser<char, String, Error = Simple<char>> {
filter(|c: &char| c.is_ascii_alphabetic() || *c == '_' || *c == '-' || *c == ':')
.chain::<char, _, _>(
filter(|c: &char| c.is_ascii_alphanumeric() || *c == '_' || *c == '-' || *c == ':')
.repeated(),
)
.collect()
}
fn attribute() -> impl Parser<char, (String, String), Error = Simple<char>> {
attr_ident().padded().then_ignore(just('=').padded()).then(
just('"')
.ignore_then(none_of("\"").repeated().collect::<String>())
.then_ignore(just('"')),
)
}
fn ssml_parser() -> impl Parser<char, SSML, Error = Simple<char>> {
let open_tag = |name: &'static str| {
just('<')
.padded()
.ignore_then(just(name).padded())
.ignore_then(attribute().padded().repeated().collect::<Vec<_>>())
.map(move |attrs| {
let mut attrs_map = HashMap::new();
for (key, value) in attrs {
let _ = attrs_map.insert(key, value);
}
attrs_map
})
.then_ignore(just('>'))
.padded()
};
let close_tag = |name: &'static str| {
just("</")
.padded()
.ignore_then(just(name).padded())
.then_ignore(just('>'))
.to(())
.padded()
};
let self_close_tag = |name: &'static str| {
just('<')
.ignore_then(just(name).padded())
.ignore_then(attribute().padded().repeated().collect::<Vec<_>>())
.map(move |attrs| {
let mut attrs_map = HashMap::new();
for (key, value) in attrs {
let _ = attrs_map.insert(key, value);
}
attrs_map
})
.then_ignore(just("/>"))
.padded()
};
let text = none_of("<")
.repeated()
.at_least(1)
.collect::<String>()
.map(|txt| txt.trim().to_string())
.map(SsmlElement::Text);
let xml_decl = just("<?xml")
.padded()
.ignore_then(
attribute().padded().repeated(),
)
.then_ignore(just("?>").padded())
.ignored()
.padded();
let ssml_parser = recursive(|element| {
let speak_element = open_tag("speak")
.then(element.clone().repeated())
.then_ignore(close_tag("speak"))
.map(|(attrs, children)| SsmlElement::Speak {
version: attrs.get("version").cloned(),
xmlns: attrs.get("xmlns").cloned(),
lang: attrs.get("xml:lang").cloned(),
children,
});
let voice_element = open_tag("voice")
.then(element.clone().repeated())
.then_ignore(close_tag("voice"))
.map(|(attrs, children)| SsmlElement::Voice {
name: attrs.get("name").cloned().unwrap_or_default(),
children,
});
let paragraph_element = open_tag("p")
.then(element.clone().repeated())
.then_ignore(close_tag("p"))
.map(|(_, children)| SsmlElement::Paragraph { children });
let sentence_element = open_tag("s")
.then(element.clone().repeated())
.then_ignore(close_tag("s"))
.map(|(_, children)| SsmlElement::Sentence { children });
let phoneme_element = open_tag("phoneme")
.then(element.clone().repeated())
.then_ignore(close_tag("phoneme"))
.map(|(attrs, children)| SsmlElement::Phoneme {
alphabet: attrs.get("alphabet").cloned().unwrap_or_default(),
ph: attrs.get("ph").cloned().unwrap_or_default(),
children,
});
let say_as_element = open_tag("say-as")
.then(element.clone().repeated())
.then_ignore(close_tag("say-as"))
.map(|(attrs, children)| SsmlElement::SayAs {
interpret_as: attrs.get("interpret-as").cloned().unwrap_or_default(),
format: attrs.get("format").cloned().unwrap_or_default(),
detail: attrs.get("detail").cloned().unwrap_or_default(),
children,
});
let sub_element = open_tag("sub")
.then(element.clone().repeated())
.then_ignore(close_tag("sub"))
.map(|(attrs, children)| SsmlElement::Sub {
alias: attrs.get("alias").cloned().unwrap_or_default(),
children,
});
let prosody_element = open_tag("prosody")
.then(element.clone().repeated())
.then_ignore(close_tag("prosody"))
.map(|(attrs, children)| SsmlElement::Prosody {
rate: attrs.get("rate").cloned().unwrap_or_default(),
pitch: attrs.get("pitch").cloned().unwrap_or_default(),
contour: attrs.get("contour").cloned().unwrap_or_default(),
range: attrs.get("range").cloned().unwrap_or_default(),
volume: attrs.get("volume").cloned().unwrap_or_default(),
children,
});
let emphasis_element = open_tag("emphasis")
.then(element.clone().repeated())
.then_ignore(close_tag("emphasis"))
.map(|(attrs, children)| SsmlElement::Emphasis {
level: attrs.get("level").cloned().unwrap_or_default(),
children,
});
let audio_element = open_tag("audio")
.then(element.clone().repeated())
.then_ignore(close_tag("audio"))
.map(|(attrs, children)| SsmlElement::Audio {
src: attrs.get("src").cloned().unwrap_or_default(),
children,
});
let desc_element = open_tag("desc")
.then(element.clone().repeated())
.then_ignore(close_tag("desc"))
.map(|(_, children)| SsmlElement::Desc { children });
let lang_element = open_tag("lang")
.then(element.clone().repeated())
.then_ignore(close_tag("lang"))
.map(|(attrs, children)| SsmlElement::Lang {
xml_lang: attrs.get("xml:lang").cloned().unwrap_or_default(),
children,
});
let break_element = self_close_tag("break")
.map(|attrs| SsmlElement::Break {
time: attrs.get("time").and_then(|t| duration_str::parse(t).ok()),
strength: attrs.get("strength").and_then(|s| s.parse().ok()),
})
.or(open_tag("break")
.then_ignore(close_tag("break"))
.map(|attrs| SsmlElement::Break {
time: attrs.get("time").and_then(|t| duration_str::parse(t).ok()),
strength: attrs.get("strength").and_then(|s| s.parse().ok()),
}));
let mark_element = self_close_tag("mark")
.map(|attrs| SsmlElement::Mark {
name: attrs.get("name").cloned().unwrap_or_default(),
})
.or(open_tag("mark")
.then_ignore(close_tag("mark"))
.map(|attrs| SsmlElement::Mark {
name: attrs.get("name").cloned().unwrap_or_default(),
}));
let lexicon_element = self_close_tag("lexicon")
.map(|attrs| SsmlElement::LexiconUri {
uri: attrs.get("uri").cloned().unwrap_or_default(),
})
.or(open_tag("lexicon")
.then_ignore(close_tag("lexicon"))
.map(|attrs| SsmlElement::LexiconUri {
uri: attrs.get("uri").cloned().unwrap_or_default(),
}));
choice((
speak_element,
voice_element,
paragraph_element,
sentence_element,
phoneme_element,
say_as_element,
sub_element,
prosody_element,
emphasis_element,
audio_element,
desc_element,
lang_element,
break_element,
mark_element,
lexicon_element,
text,
))
})
.repeated()
.collect::<Vec<_>>()
.map(|elements| SSML { elements });
xml_decl
.or_not()
.ignore_then(ssml_parser)
.then_ignore(end())
}
pub type ParseError = Vec<Simple<char>>;
pub fn from_str(input: impl AsRef<str>) -> Result<SSML, ParseError> {
ssml_parser().parse(input.as_ref())
}
pub fn to_string(ssml: &SSML) -> String {
ser::to_ssml(ssml)
}
#[cfg(test)]
mod documentation_examples {
use super::*;
#[test]
fn example_parsing_and_traversing() {
let input = r#"
<speak version="1.1" xml:lang="en-US">
<p>
<s>This is a <emphasis level="strong">important</emphasis> message.</s>
</p>
</speak>
"#;
let ssml = from_str(input).expect("Failed to parse SSML");
if let SsmlElement::Speak {
version,
lang,
children,
..
} = &ssml.elements[0]
{
assert_eq!(version.as_deref(), Some("1.1"));
assert_eq!(lang.as_deref(), Some("en-US"));
fn find_emphasized_text(element: &SsmlElement) -> Option<String> {
match element {
SsmlElement::Emphasis { children, .. } => children.iter().find_map(|child| {
if let SsmlElement::Text(text) = child {
Some(text.clone())
} else {
None
}
}),
SsmlElement::Paragraph { children }
| SsmlElement::Sentence { children }
| SsmlElement::Voice { children, .. }
| SsmlElement::Prosody { children, .. }
| SsmlElement::Audio { children, .. }
| SsmlElement::Lang { children, .. } => {
children.iter().find_map(find_emphasized_text)
}
_ => None,
}
}
let emphasized_text = children
.iter()
.find_map(find_emphasized_text)
.expect("No emphasized text found");
assert_eq!(emphasized_text, "important");
}
}
#[test]
fn example_constructing_ssml() {
let ssml = SSML {
elements: vec![SsmlElement::Speak {
version: Some("1.1".to_string()),
xmlns: Some("http://www.w3.org/2001/10/synthesis".to_string()),
lang: Some("en-US".to_string()),
children: vec![
SsmlElement::Paragraph {
children: vec![SsmlElement::Sentence {
children: vec![
SsmlElement::Text("Welcome to ".to_string()),
SsmlElement::Emphasis {
level: "strong".to_string(),
children: vec![SsmlElement::Text("SSML".to_string())],
},
SsmlElement::Text(" parsing!".to_string()),
],
}],
},
SsmlElement::Break {
time: Some(Duration::from_millis(500)),
strength: Some(BreakStrength::Medium),
},
],
}],
};
assert_eq!(ssml.elements.len(), 1);
if let SsmlElement::Speak { children, .. } = &ssml.elements[0] {
assert_eq!(children.len(), 2);
}
}
}