use crate::{
lang::{Edition, Lang},
models::kaikki::{Form, WordEntry},
};
pub fn preprocess_forms(edition: Edition, source: Lang, entry: &mut WordEntry) {
match (edition, source, entry.pos.as_str()) {
(Edition::De, Lang::De, "verb") => preprocess_verb_forms_de_de(entry),
(Edition::De, Lang::De, "adj") => preprocess_adj_forms_de_de(entry),
(Edition::De, Lang::De, "name") => preprocess_name_forms_de_de(entry),
(Edition::De, Lang::De, "noun" | "phrase") => preprocess_noun_forms_de_de(entry),
(Edition::En, Lang::Ga, _) => preprocess_forms_ga_en(entry),
(Edition::Es, Lang::Es, "verb") => preprocess_verb_forms_es_es(entry),
(Edition::Fr, Lang::Fr, "verb") => preprocess_verb_forms_fr_fr(entry),
(Edition::It, Lang::It, "verb") => preprocess_verb_forms_it_it(entry),
(Edition::Nl, Lang::Nl, "verb") => preprocess_verb_forms_nl_nl(entry),
(Edition::Pt, Lang::Pt, "verb") => preprocess_verb_forms_pt_pt(entry),
_ => (),
}
}
fn strip_prefixes(entry: &mut WordEntry, prefixes: &[&str]) {
debug_assert!(prefixes.iter().all(|pron| pron.ends_with(' ')));
for form in &mut entry.forms {
for &prefix in prefixes {
if let Some(stripped) = form.form.strip_prefix(prefix) {
form.form = stripped.to_string();
break;
}
}
}
}
fn contains_all(form: &Form, tags: &[&str]) -> bool {
tags.iter()
.all(|ctag| form.tags.iter().any(|tag| tag == ctag))
}
fn preprocess_verb_forms_de_de(entry: &mut WordEntry) {
const PRONOUNS: &[&str] = &["ich ", "du ", "er/sie/es ", "wir ", "ihr ", "sie "];
strip_prefixes(entry, PRONOUNS);
entry.forms.retain(|form| {
let is_compound = form.tags.iter().any(|tag| {
matches!(
tag.as_str(),
"perfect"
| "pluperfect"
| "future-i"
| "future-ii"
| "processual-passive"
| "statal-passive"
)
});
!is_compound && !form.form.ends_with(['…', '!'])
&& !form.form.contains(',')
});
for form in &mut entry.forms {
if let Some(stripped) = form.form.strip_prefix("zu ") {
form.form = stripped.to_string();
}
if form.tags.iter().any(|tag| tag == "extended")
&& let Some(stripped) = form.form.strip_suffix(" zu haben")
{
form.form = stripped.to_string();
}
}
}
fn preprocess_adj_forms_de_de(entry: &mut WordEntry) {
const PREFIXES: &[&str] = &["er ist ", "es ist ", "sie ist ", "sie sind "];
strip_prefixes(entry, PREFIXES);
strip_prefixes(entry, &["am "]);
}
fn preprocess_name_forms_de_de(entry: &mut WordEntry) {
const PREFIXES: &[&str] = &[
"des ", "(das) ", "dem ", "(dem) ", "der ", "(der) ", "die ", "(die) ", "den ",
];
strip_prefixes(entry, PREFIXES);
entry.forms.retain(|form| !form.form.ends_with('’'));
}
fn preprocess_noun_forms_de_de(entry: &mut WordEntry) {
#[rustfmt::skip]
const PREFIXES: &[&str] = &[
"der ", "das ", "die ",
"ein ", "eine ", "keine ",
"den ", "einen ",
"dem ", "einem ", "keinen ",
"des ", "eines ", "einer ", "keiner "
];
strip_prefixes(entry, PREFIXES);
}
fn preprocess_forms_ga_en(entry: &mut WordEntry) {
const PREFIXES: &[&str] = &["a ", "an ", "na ", "leis an ", "don ", "leis na "];
strip_prefixes(entry, PREFIXES);
}
fn preprocess_verb_forms_es_es(entry: &mut WordEntry) {
entry.forms.retain(|form| {
let is_compound = form
.tags
.iter()
.any(|tag| matches!(tag.as_str(), "pluperfect" | "compound"));
let is_infinitive_impersonal = contains_all(form, &["infinitive", "impersonal"]);
let is_perfect_subjunctive = contains_all(form, &["perfect", "subjunctive"]);
!is_compound && !is_infinitive_impersonal && !is_perfect_subjunctive
});
}
fn preprocess_verb_forms_fr_fr(entry: &mut WordEntry) {
const PRONOUNS: &[&str] = &[
"je ",
"j' ",
"tu ",
"il/elle/on ",
"nous ",
"vous ",
"ils/elles ",
];
strip_prefixes(entry, PRONOUNS);
entry.forms.retain(|form| {
let is_compound = form
.tags
.iter()
.any(|tag| matches!(tag.as_str(), "perfect" | "pluperfect" | "anterior"));
let is_past_conditional = contains_all(form, &["past", "conditional"]);
let is_past_imperative = contains_all(form, &["past", "imperative"]);
!is_compound && !is_past_conditional && !is_past_imperative
});
}
fn preprocess_verb_forms_it_it(entry: &mut WordEntry) {
const AVERE_AUX: &[&str] = &[
"avrei ",
"avresti ",
"avrebbe ",
"avremmo ",
"avreste ",
"avrebbero ",
"abbia ",
"abbiamo ",
"abbiate ",
"abbiano ",
"non ",
];
strip_prefixes(entry, AVERE_AUX);
entry.forms.retain(|form| {
let is_compound = form
.tags
.iter()
.any(|tag| matches!(tag.as_str(), "perfect" | "pluperfect" | "historic"));
!is_compound && !form.form.ends_with(')')
});
}
fn preprocess_verb_forms_nl_nl(entry: &mut WordEntry) {
entry.forms.retain(|form| {
let has_newline_or_parens = form.form.contains(['\n', '(']);
let is_compound = form
.tags
.iter()
.any(|tag| matches!(tag.as_str(), "perfect"));
!has_newline_or_parens && !is_compound
});
}
fn preprocess_verb_forms_pt_pt(entry: &mut WordEntry) {
strip_prefixes(entry, &["não "]);
}