use std::fmt::Write;
use super::stream::ParseError;
use crate::{
category::Stress,
romanize::token::{NumeralForm, OwnedConsonantForm, Token},
};
pub fn normalize(word: &str) -> String {
let word = word
.to_lowercase()
.replace('', "")
.replace("á", "á")
.replace("ä", "ä")
.replace("â", "â")
.replace("é", "é")
.replace("ë", "ë")
.replace("ê", "ê")
.replace("ì", "i")
.replace("í", "í")
.replace("ó", "ó")
.replace("ö", "ö")
.replace("ô", "ô")
.replace("ù", "u")
.replace("ú", "ú")
.replace("ü", "ü")
.replace("û", "û")
.replace("č", "č")
.replace("ç", "ç")
.replace("ţ", "ţ")
.replace("ṭ", "ţ")
.replace("ḍ", "ḑ")
.replace("ḑ", "ḑ")
.replace("ḷ", "ļ")
.replace("ļ", "ļ")
.replace("š", "š")
.replace("ž", "ž")
.replace("ż", "ẓ")
.replace("ẓ", "ẓ")
.replace("ň", "ň")
.replace("ņ", "ň")
.replace("ṇ", "ň")
.replace("ř", "ř")
.replace("ŗ", "ř")
.replace("r͕", "ř")
.replace("ṛ", "ř");
let mut output = String::with_capacity(word.capacity());
let mut chars = word.chars();
match chars.next() {
None => return output,
Some('’' | 'ʼ' | '‘' | '\'') => {}
Some(char) => output.push(match char {
'ì' => 'i',
'ı' => 'i',
'ù' => 'u',
'ṭ' => 'ţ',
'ŧ' => 'ţ',
'ț' => 'ţ',
'ḍ' => 'ḑ',
'đ' => 'ḑ',
'ł' => 'ļ',
'ḷ' => 'ļ',
'ż' => 'ẓ',
'ṇ' => 'ň',
'ṛ' => 'ř',
'ŗ' => 'ř',
value => value,
}),
}
for char in chars {
output.push(match char {
'’' => '\'',
'ʼ' => '\'',
'‘' => '\'',
'ì' => 'i',
'ı' => 'i',
'ù' => 'u',
'ṭ' => 'ţ',
'ŧ' => 'ţ',
'ț' => 'ţ',
'ḍ' => 'ḑ',
'đ' => 'ḑ',
'ł' => 'ļ',
'ḷ' => 'ļ',
'ż' => 'ẓ',
'ṇ' => 'ň',
'ṛ' => 'ř',
'ŗ' => 'ř',
value => value,
});
}
output
}
pub fn detect_stress(word: &str) -> Result<Option<Stress>, ParseError> {
enum LastVowel {
None,
I,
U,
}
let mut vowel_forms_detected = 0;
let mut last_vowel = LastVowel::None;
let mut stress = None;
enum VowelStatus {
NonVowel,
Unstressed,
Stressed,
UnstressedAfterDipthong,
StressedAfterDipthong,
}
for char in word.chars().rev() {
let status = match (char, last_vowel) {
('a' | 'e' | 'ë' | 'o' | 'u', LastVowel::I) => {
last_vowel = LastVowel::None;
VowelStatus::UnstressedAfterDipthong
}
('á' | 'é' | 'ê' | 'ó' | 'ú', LastVowel::I) => {
last_vowel = LastVowel::None;
VowelStatus::StressedAfterDipthong
}
('a' | 'e' | 'ë' | 'o' | 'i', LastVowel::U) => {
last_vowel = LastVowel::None;
VowelStatus::UnstressedAfterDipthong
}
('á' | 'é' | 'ê' | 'ó' | 'í', LastVowel::U) => {
last_vowel = LastVowel::None;
VowelStatus::StressedAfterDipthong
}
('i', _) => {
last_vowel = LastVowel::I;
VowelStatus::Unstressed
}
('í', _) => {
last_vowel = LastVowel::I;
VowelStatus::Stressed
}
('u', _) => {
last_vowel = LastVowel::U;
VowelStatus::Unstressed
}
('ú', _) => {
last_vowel = LastVowel::U;
VowelStatus::Stressed
}
('a' | 'ä' | 'e' | 'ë' | 'o' | 'ö' | 'ü', _) => {
last_vowel = LastVowel::None;
VowelStatus::Unstressed
}
('á' | 'â' | 'é' | 'ê' | 'ó' | 'ô' | 'û', _) => {
last_vowel = LastVowel::None;
VowelStatus::Stressed
}
(_, _) => {
last_vowel = LastVowel::None;
VowelStatus::NonVowel
}
};
match status {
VowelStatus::NonVowel => {}
VowelStatus::Unstressed => {
vowel_forms_detected += 1;
}
VowelStatus::Stressed => {
vowel_forms_detected += 1;
if stress.is_some() {
return Err(ParseError::StressDoubled);
}
stress = match vowel_forms_detected {
1 => Some(Stress::Ultimate),
2 => Some(Stress::Penultimate),
3 => Some(Stress::Antepenultimate),
_ => return Err(ParseError::StressInvalid),
}
}
VowelStatus::UnstressedAfterDipthong => {}
VowelStatus::StressedAfterDipthong => {
if stress.is_some() {
return Err(ParseError::StressDoubled);
}
stress = match vowel_forms_detected {
1 => Some(Stress::Ultimate),
2 => Some(Stress::Penultimate),
3 => Some(Stress::Antepenultimate),
_ => return Err(ParseError::StressInvalid),
}
}
};
}
if vowel_forms_detected == 1 && stress == None {
stress = Some(Stress::Monosyllabic);
}
Ok(stress)
}
pub fn unstress_vowels(word: &str) -> String {
word.replace("á", "a")
.replace("â", "ä")
.replace("é", "e")
.replace("ê", "ë")
.replace("í", "i")
.replace("ó", "o")
.replace("ô", "ö")
.replace("ú", "u")
.replace("û", "ü")
}
pub fn tokenize(word: &str) -> Result<Vec<Token>, ParseError> {
let (word, has_word_final_glottal_stop) = match word.strip_suffix('\'') {
Some(value) => (value, true),
None => (word, false),
};
let word = word.to_owned();
#[derive(Clone, Copy)]
enum CurrentToken {
None,
C,
V,
N,
}
let mut tokens = Vec::new();
let mut current_token: CurrentToken = CurrentToken::None;
let mut current = String::new();
macro_rules! push_current_token {
() => {
if !current.is_empty() {
match current_token {
CurrentToken::None => {
unreachable!("tokens were parsed without setting a corresponding type");
}
CurrentToken::C => {
if current.starts_with(['h', 'w', 'y']) {
tokens.push(Token::H(match current.parse() {
Ok(h_form) => h_form,
Err(_) => return Err(ParseError::SourceHFormInvalid),
}));
} else {
tokens.push(Token::C(OwnedConsonantForm(current)));
}
}
CurrentToken::V => tokens.push(match ¤t[..] {
"'" => Token::GlottalStop,
"ë" => Token::Schwa,
"üa" => Token::ÜA,
vowel_form => Token::V(match vowel_form.parse() {
Ok(vowel_form) => vowel_form,
Err(_) => return Err(ParseError::SourceVowelInvalid),
}),
}),
CurrentToken::N => match current.parse() {
Ok(value) => tokens.push(Token::N(NumeralForm {
integer_part: value,
})),
Err(_) => return Err(ParseError::SourceNumeralInvalid),
},
}
}
};
}
for char in word.chars() {
match char {
'b' | 'c' | 'ç' | 'č' | 'd' | 'ḑ' | 'f' | 'g' | 'h' | 'j' | 'k' | 'l' | 'ļ' | 'm'
| 'n' | 'ň' | 'p' | 'r' | 'ř' | 's' | 'š' | 't' | 'ţ' | 'v' | 'w' | 'x' | 'y' | 'z'
| 'ẓ' | 'ž' | '_' => {
if matches!(current_token, CurrentToken::C) {
current.push(char);
} else {
push_current_token!();
current_token = CurrentToken::C;
current = char.to_string();
}
}
'a' | 'ä' | 'e' | 'ë' | 'i' | 'o' | 'ö' | 'u' | 'ü' | '\'' => {
if matches!(current_token, CurrentToken::V) {
current.push(char);
} else {
push_current_token!();
current_token = CurrentToken::V;
current = char.to_string();
}
}
'0'..='9' | '.' => {
if matches!(current_token, CurrentToken::N) {
current.push(char);
} else {
push_current_token!();
current_token = CurrentToken::N;
current = char.to_string();
}
}
_ => return Err(ParseError::SourceCharInvalid),
}
}
push_current_token!();
if has_word_final_glottal_stop {
tokens.push(Token::GlottalStop);
}
Ok(tokens)
}
pub fn tokens_to_string(tokens: &[Token]) -> String {
let mut output = String::new();
let final_index = match tokens.len().checked_sub(1) {
Some(value) => value,
_ => return output,
};
for (index, token) in tokens.iter().enumerate() {
match token {
Token::C(value) => output += value,
Token::V(value) => output += value.as_str_after(&output, final_index == index),
Token::H(value) => output += value.as_str(),
Token::N(value) => write!(output, "{}", value.integer_part)
.expect("a Display implementation errored unexpectedly"),
Token::ÜA => output += "üa",
Token::Schwa => output += "ë",
Token::GlottalStop => output += "'",
}
}
output
}
pub fn add_stress(word: &str, stress: Stress) -> Option<String> {
let vowels_required = match stress {
Stress::Monosyllabic => 1,
Stress::Ultimate => 1,
Stress::Penultimate => 2,
Stress::Antepenultimate => 3,
};
let mut vowels_found = 0;
let mut char_list: Vec<char> = Vec::new();
let mut chars = word.chars().rev().peekable();
loop {
let char = chars.next()?;
match char {
'a' | 'ä' | 'e' | 'ë' | 'o' | 'ö' | 'ü' => {
vowels_found += 1;
if vowels_found == vowels_required {
match stress {
Stress::Monosyllabic => {
if chars.any(|x| {
matches!(x, 'a' | 'ä' | 'e' | 'ë' | 'i' | 'o' | 'ö' | 'u' | 'ü')
}) {
return None;
} else {
return Some(word.to_owned());
}
}
Stress::Ultimate => {
if !chars.any(|x| {
matches!(x, 'a' | 'ä' | 'e' | 'ë' | 'i' | 'o' | 'ö' | 'u' | 'ü')
}) {
return Some(word.to_owned());
}
}
Stress::Penultimate => return Some(word.to_owned()),
Stress::Antepenultimate => {}
}
let mut output = chars.rev().collect::<String>();
output.push(match char {
'a' => 'á',
'ä' => 'â',
'e' => 'é',
'ë' => 'ê',
'o' => 'ó',
'ö' => 'ô',
'ü' => 'û',
_ => unreachable!(),
});
for char in char_list.into_iter().rev() {
output.push(char);
}
return Some(output);
} else {
char_list.push(char);
}
}
'i' | 'u' => {
vowels_found += 1;
let char = match chars.next_if(|next_char| match next_char {
'a' | 'e' | 'ë' | 'i' | 'o' | 'u' if *next_char != char => true,
_ => false,
}) {
Some(next_char) => {
char_list.push(char);
next_char
}
None => char,
};
if vowels_found == vowels_required {
match stress {
Stress::Monosyllabic => {
if chars.any(|x| {
matches!(x, 'a' | 'ä' | 'e' | 'ë' | 'i' | 'o' | 'ö' | 'u' | 'ü')
}) {
return None;
} else {
return Some(word.to_owned());
}
}
Stress::Ultimate => {
if !chars.any(|x| {
matches!(x, 'a' | 'ä' | 'e' | 'ë' | 'i' | 'o' | 'ö' | 'u' | 'ü')
}) {
return Some(word.to_owned());
}
}
Stress::Penultimate => return Some(word.to_owned()),
Stress::Antepenultimate => {}
}
let mut output = chars.rev().collect::<String>();
output.push(match char {
'a' => 'á',
'e' => 'é',
'ë' => 'ê',
'i' => 'í',
'o' => 'ó',
'u' => 'ú',
_ => unreachable!(),
});
for char in char_list.into_iter().rev() {
output.push(char);
}
return Some(output);
} else {
char_list.push(char);
}
}
_ => char_list.push(char),
}
}
}