use std::collections::HashMap;
use crate::{
core::UromanInner,
utils::{
dequote_string, has_value_in_double_colon_del_list, slot_value_in_double_colon_del_list,
},
};
#[allow(unused)]
#[derive(Debug, Clone)]
pub(super) struct RomRule {
pub s: String,
pub t: Option<String>,
pub prov: String,
pub lcodes: Vec<String>,
pub use_only_at_start_of_word: bool,
pub dont_use_at_start_of_word: bool,
pub use_only_at_end_of_word: bool,
pub dont_use_at_end_of_word: bool,
pub use_only_for_whole_word: bool,
pub n_restr: usize,
pub t_alts: Vec<String>,
pub num: Option<crate::core::Value>,
pub is_minus_sign: bool,
pub is_plus_sign: bool,
pub is_decimal_point: bool,
pub fraction_connector: bool,
pub percentage_marker: bool,
pub int_frac_connector: bool,
pub is_large_power: bool,
pub t_at_end_of_syllable: Option<String>,
}
impl RomRule {
pub fn new_simple(s: String, t: &str, provenance: &str) -> Self {
Self {
s,
t: Some(t.to_string()),
prov: provenance.to_string(),
lcodes: Vec::new(),
use_only_at_start_of_word: false,
dont_use_at_start_of_word: false,
use_only_at_end_of_word: false,
dont_use_at_end_of_word: false,
use_only_for_whole_word: false,
n_restr: 0,
t_alts: Vec::new(),
num: None,
is_minus_sign: false,
is_plus_sign: false,
is_decimal_point: false,
fraction_connector: false,
percentage_marker: false,
int_frac_connector: false,
is_large_power: false,
t_at_end_of_syllable: None,
}
}
pub fn is_unconditional(&self) -> bool {
self.lcodes.is_empty()
&& !self.use_only_at_start_of_word
&& !self.dont_use_at_start_of_word
&& !self.use_only_at_end_of_word
&& !self.dont_use_at_end_of_word
&& !self.use_only_for_whole_word
}
pub fn from_line(
line: &str,
provenance: &str,
file_format: &str,
uroman: &mut UromanInner,
) -> Option<Self> {
let (s, t) = if file_format == "u2r" {
let u_str = slot_value_in_double_colon_del_list(line, "u")?;
let cp = u32::from_str_radix(u_str, 16).ok()?;
let s = std::char::from_u32(cp)?.to_string();
let t = slot_value_in_double_colon_del_list(line, "r")
.map(|s_val| dequote_string(s_val).to_string());
(s, t)
} else {
let s = slot_value_in_double_colon_del_list(line, "s")
.map(|s_val| dequote_string(s_val).to_string())?;
let t = slot_value_in_double_colon_del_list(line, "t")
.map(|s_val| dequote_string(s_val).to_string());
(s, t)
};
let t = uroman.second_rom_filter(&s, t.as_deref());
let lcodes: Vec<String> = slot_value_in_double_colon_del_list(line, "lcode")
.map(|s| {
s.split([',', ';'])
.map(|part| part.trim().to_string())
.collect()
})
.unwrap_or_default();
let use_only_at_start_of_word =
has_value_in_double_colon_del_list(line, "use-only-at-start-of-word");
let dont_use_at_start_of_word =
has_value_in_double_colon_del_list(line, "dont-use-at-start-of-word");
let use_only_at_end_of_word =
has_value_in_double_colon_del_list(line, "use-only-at-end-of-word");
let dont_use_at_end_of_word =
has_value_in_double_colon_del_list(line, "dont-use-at-end-of-word");
let use_only_for_whole_word =
has_value_in_double_colon_del_list(line, "use-only-for-whole-word");
let t_alts: Vec<String> = slot_value_in_double_colon_del_list(line, "t-alt")
.map(|s| {
s.split([',', ';'])
.map(|part| dequote_string(part).to_string())
.collect()
})
.unwrap_or_default();
let num = slot_value_in_double_colon_del_list(line, "num")
.and_then(crate::utils::robust_str_to_num);
let is_minus_sign = has_value_in_double_colon_del_list(line, "is-minus-sign");
let is_plus_sign = has_value_in_double_colon_del_list(line, "is-plus-sign");
let is_decimal_point = has_value_in_double_colon_del_list(line, "is-decimal-point");
let fraction_connector = has_value_in_double_colon_del_list(line, "fraction-connector");
let percentage_marker = has_value_in_double_colon_del_list(line, "percentage-marker");
let int_frac_connector = has_value_in_double_colon_del_list(line, "int-frac-connector");
let is_large_power = has_value_in_double_colon_del_list(line, "is-large-power");
let t_at_end_of_syllable = slot_value_in_double_colon_del_list(line, "t-end-of-syllable")
.map(|s_val| dequote_string(s_val).to_string());
let n_restr = [
!lcodes.is_empty(),
use_only_at_start_of_word,
dont_use_at_start_of_word,
use_only_at_end_of_word,
dont_use_at_end_of_word,
use_only_for_whole_word,
]
.iter()
.filter(|&&is_restr| is_restr)
.count();
Some(RomRule {
s,
t,
prov: provenance.to_string(),
lcodes,
use_only_at_start_of_word,
dont_use_at_start_of_word,
use_only_at_end_of_word,
dont_use_at_end_of_word,
use_only_for_whole_word,
n_restr,
t_alts,
num,
is_minus_sign,
is_plus_sign,
is_decimal_point,
fraction_connector,
percentage_marker,
int_frac_connector,
is_large_power,
t_at_end_of_syllable,
})
}
}
pub type RomRules = HashMap<String, Vec<RomRule>>;