use crate::data::{LetterEntry, RuleKind, Special, AUTOMATON};
use crate::rules;
#[cfg(feature = "regex-full")]
use regex::Regex;
#[cfg(all(feature = "regex-lite", not(feature = "regex-full")))]
use regex_lite::Regex;
use std::collections::HashMap;
use std::sync::OnceLock;
#[cfg(not(any(feature = "regex-full", feature = "regex-lite")))]
compile_error!("one of the features `regex-full` or `regex-lite` must be enabled");
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Phoneme {
pub code: String,
pub step: usize,
}
fn regex_cache() -> &'static HashMap<&'static str, Regex> {
static CACHE: OnceLock<HashMap<&'static str, Regex>> = OnceLock::new();
CACHE.get_or_init(|| {
let mut map = HashMap::new();
for (_, entry) in AUTOMATON {
for rule in entry.rules {
if let RuleKind::Context {
plus,
minus,
has_plus,
has_minus,
} = rule.kind
{
if has_plus && !plus.is_empty() {
map.entry(plus)
.or_insert_with(|| Regex::new(plus).expect("invalid regex in data.rs"));
}
if has_minus && !minus.is_empty() {
map.entry(minus).or_insert_with(|| {
Regex::new(minus).expect("invalid regex in data.rs")
});
}
}
}
}
map
})
}
fn get_regex(pattern: &str) -> Option<&'static Regex> {
regex_cache().get(pattern)
}
fn lookup_letter(letter: char) -> Option<&'static LetterEntry> {
let s = letter.to_string();
for (k, entry) in AUTOMATON {
if *k == s.as_str() {
return Some(entry);
}
}
None
}
fn check_context(
plus: &str,
minus: &str,
has_plus: bool,
has_minus: bool,
word: &[char],
pos_mot: usize,
) -> bool {
let mut found_s = true;
let mut found_p = true;
let suffix: String = word[pos_mot..].iter().collect();
if has_plus {
match get_regex(plus) {
Some(re) => {
found_s = re.find(&suffix).is_some_and(|m| m.start() == 0);
}
None => found_s = false,
}
}
if has_minus {
let prefix: String = word[..pos_mot - 1].iter().collect();
found_p = false;
if minus.starts_with('^') {
if minus.len() == 1 {
found_p = pos_mot == 1;
} else {
if let Some(re) = get_regex(minus) {
if let Some(mat) = re.find(&prefix) {
found_p = mat.start() == 0 && mat.end() == prefix.len();
}
}
}
} else {
if let Some(re) = get_regex(minus) {
let prefix_len = prefix.chars().count();
for k in (0..prefix_len).rev() {
let sub: String = word[k..pos_mot - 1].iter().collect();
if let Some(mat) = re.find(&sub) {
if mat.start() == 0 && mat.end() == sub.len() {
found_p = true;
break;
}
}
}
}
}
}
found_p && found_s
}
fn check_special(sp: Special, word: &[char], pos_mot: usize) -> bool {
match sp {
Special::RegleIent => rules::regle_ient(word, pos_mot),
Special::RegleMotsEnt => rules::regle_mots_ent(word, pos_mot),
Special::RegleMent => rules::regle_ment(word, pos_mot),
Special::RegleVerbeMer => rules::regle_verbe_mer(word, pos_mot),
Special::RegleEr => rules::regle_er(word, pos_mot),
Special::RegleNcAiFinal => rules::regle_nc_ai_final(word, pos_mot),
Special::RegleAvoir => rules::regle_avoir(word, pos_mot),
Special::RegleSFinal => rules::regle_s_final(word, pos_mot),
Special::RegleTFinal => rules::regle_t_final(word, pos_mot),
Special::RegleTien => rules::regle_tien(word, pos_mot),
}
}
fn one_step(word: &[char], pos: usize) -> Phoneme {
let letter = word[pos];
let entry = match lookup_letter(letter) {
Some(e) => e,
None => {
return Phoneme {
code: String::new(),
step: 1,
}
}
};
for rule in entry.rules {
let applies = match rule.kind {
RuleKind::Context {
plus,
minus,
has_plus,
has_minus,
} => check_context(plus, minus, has_plus, has_minus, word, pos + 1),
RuleKind::Special(sp) => check_special(sp, word, pos + 1),
};
if applies {
return Phoneme {
code: rule.phoneme.to_string(),
step: rule.step,
};
}
}
if pos == word.len() - 1 {
if let Some((phon, step)) = entry.end_of_word {
return Phoneme {
code: phon.to_string(),
step,
};
}
}
if let Some((phon, step)) = entry.default {
return Phoneme {
code: phon.to_string(),
step,
};
}
Phoneme {
code: String::new(),
step: 1,
}
}
pub fn parse(word: &str) -> Vec<Phoneme> {
let chars: Vec<char> = word.chars().collect();
let mut code: Vec<Phoneme> = Vec::new();
let mut pos = 0;
while pos < chars.len() {
let ph = one_step(&chars, pos);
pos += ph.step;
code.push(ph);
}
code
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_chat() {
let ph = parse("chat");
let codes: Vec<&str> = ph.iter().map(|p| p.code.as_str()).collect();
assert_eq!(codes, &["s^", "a", "#"]);
}
#[test]
fn parse_ecole() {
let ph = parse("école");
let codes: Vec<&str> = ph.iter().map(|p| p.code.as_str()).collect();
assert_eq!(codes, &["e", "k", "o", "l", "q_caduc"]);
}
}