use crate::decoder::TextChunk;
use crate::{liaison_possible, syllabify_text, syllables};
pub fn render_word_html(word: &str) -> String {
render_word_spans(&syllables(word))
}
pub fn render_html(text: &str) -> String {
let chunks = syllabify_text(text);
let mut out = String::with_capacity(text.len() * 4);
let mut previous_word_raw: Option<String> = None;
for chunk in &chunks {
match chunk {
TextChunk::Raw(s) => {
if !s.chars().all(char::is_whitespace) {
previous_word_raw = None;
}
out.push_str(&escape(s));
}
TextChunk::Word(sylls) => {
let word_raw: String = sylls.concat();
if let Some(prev) = &previous_word_raw {
if liaison_possible(prev, &word_raw) {
let consonant = liaison_consonant_for(prev);
out.push_str(&format!(
r#"<span class="liaison" data-with="{}"></span>"#,
consonant
));
}
}
out.push_str(&render_word_spans(sylls));
previous_word_raw = Some(word_raw);
}
}
}
out
}
fn render_word_spans(sylls: &[String]) -> String {
if sylls.iter().all(|s| s.is_empty()) {
return String::new();
}
let mut s = String::from(r#"<span class="word">"#);
for (i, syl) in sylls.iter().enumerate() {
let class = if i % 2 == 0 { "syl syl-a" } else { "syl syl-b" };
s.push_str(&format!(
r#"<span class="{}">{}</span>"#,
class,
escape(syl)
));
}
s.push_str("</span>");
s
}
fn liaison_consonant_for(prev: &str) -> &'static str {
let last = prev
.chars()
.rev()
.flat_map(|c| c.to_lowercase())
.next()
.unwrap_or(' ');
match last {
's' | 'x' | 'z' => "z",
'd' | 't' => "t",
'n' => "n",
'p' => "p",
'r' => "r",
'g' => "k",
_ => "z",
}
}
fn escape(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for c in s.chars() {
match c {
'&' => out.push_str("&"),
'<' => out.push_str("<"),
'>' => out.push_str(">"),
'"' => out.push_str("""),
'\'' => out.push_str("'"),
_ => out.push(c),
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn mot_simple_3_syllabes() {
let html = render_word_html("chocolat");
assert_eq!(
html,
r#"<span class="word"><span class="syl syl-a">cho</span><span class="syl syl-b">co</span><span class="syl syl-a">lat</span></span>"#
);
}
#[test]
fn mot_alternance_demarre_a() {
assert!(render_word_html("famille")
.starts_with(r#"<span class="word"><span class="syl syl-a">fa</span>"#));
}
#[test]
fn texte_preserve_ponctuation() {
let html = render_html("le chat,");
assert!(html.contains(r#">le</span>"#));
assert!(html.contains(" "));
assert!(html.ends_with(","));
}
#[test]
fn liaison_les_hotels_emet_span() {
let html = render_html("les hôtels");
assert!(
html.contains(r#"<span class="liaison" data-with="z"></span>"#),
"liaison 'z' attendue, got: {}",
html
);
let pos_first_word = html.find("les").unwrap();
let pos_liaison = html.find(r#"class="liaison""#).unwrap();
let pos_second_word = html.find("ô").unwrap();
assert!(pos_first_word < pos_liaison);
assert!(pos_liaison < pos_second_word);
}
#[test]
fn liaison_absente_h_aspire() {
let html = render_html("les héros");
assert!(!html.contains(r#"class="liaison""#));
}
#[test]
fn liaison_absente_consonne_initiale() {
let html = render_html("les chats");
assert!(!html.contains(r#"class="liaison""#));
}
#[test]
fn liaison_consonne_t_pour_tout() {
let html = render_html("tout ami");
assert!(html.contains(r#"data-with="t""#), "got: {}", html);
}
#[test]
fn liaison_consonne_n_pour_en() {
let html = render_html("en automne");
assert!(html.contains(r#"data-with="n""#), "got: {}", html);
}
#[test]
fn liaison_bloquee_par_virgule() {
let html = render_html("les, hôtels");
assert!(!html.contains(r#"class="liaison""#), "got: {}", html);
}
#[test]
fn homographes_contexte_respecte() {
let html_nom = render_html("le couvent");
let html_verbe = render_html("elles couvent");
assert!(
html_nom.contains(r#">cou</span><span class="syl syl-b">vent</span>"#),
"got: {}",
html_nom
);
assert!(html_verbe.contains(">cou</span>"));
}
#[test]
fn html_echappe_caracteres_speciaux() {
let html = render_html("a < b");
assert!(html.contains("<"));
}
#[test]
fn texte_vide() {
assert_eq!(render_html(""), "");
}
#[test]
fn mot_vide_ne_crash_pas() {
assert_eq!(render_word_html(""), "");
}
}