use std::collections::BTreeMap;
use std::collections::HashMap;
use crate::conlang::morphology::paradigm;
use crate::conlang::types::morphology::Morphology;
use crate::conlang::Phonology;
use crate::language_entry::DictionaryEntry;
pub struct LangCtx<'a> {
pub phon: &'a Phonology,
pub morph: &'a Morphology,
pub typology: &'a BTreeMap<String, String>,
pub entries: &'a [DictionaryEntry],
}
#[derive(Debug, Clone)]
struct RevForm {
gloss: String,
plural: bool,
}
pub struct ReverseIndex {
forms: HashMap<String, RevForm>,
}
fn broad_paradigm(pos: &str) -> Option<&'static str> {
let p = pos.to_lowercase();
if p.starts_with('n') {
Some("noun")
} else if p.starts_with('v') {
Some("verb")
} else {
None
}
}
impl ReverseIndex {
pub fn build(phon: &Phonology, morph: &Morphology, entries: &[DictionaryEntry]) -> Self {
let mut forms: HashMap<String, RevForm> = HashMap::new();
for e in entries {
if e.word.trim().is_empty() || e.translation.trim().is_empty() {
continue;
}
if let Some(name) = broad_paradigm(&e.pos) {
if let Some(tmpl) = morph.paradigm(name) {
for row in paradigm::generate(phon, morph, tmpl, &e.word, &e.translation) {
let plural = row
.features
.get("number")
.map(|n| n.starts_with("pl"))
.unwrap_or(false);
forms
.entry(row.form.to_lowercase())
.or_insert_with(|| RevForm { gloss: e.translation.clone(), plural });
}
}
}
for v in e.inflection.values() {
forms.entry(v.to_lowercase()).or_insert_with(|| RevForm {
gloss: e.translation.clone(),
plural: false,
});
}
forms.insert(
e.word.to_lowercase(),
RevForm { gloss: e.translation.clone(), plural: false },
);
}
ReverseIndex { forms }
}
fn lookup(&self, surface: &str) -> Option<&RevForm> {
self.forms.get(&surface.to_lowercase())
}
}
fn pluralize_en(noun: &str) -> String {
if let Some(stem) = noun.strip_suffix('y') {
if !stem.ends_with(['a', 'e', 'i', 'o', 'u']) {
return format!("{stem}ies"); }
}
if noun.ends_with('s')
|| noun.ends_with('x')
|| noun.ends_with('z')
|| noun.ends_with("ch")
|| noun.ends_with("sh")
{
return format!("{noun}es"); }
format!("{noun}s")
}
fn verb_3sg(verb: &str) -> String {
if let Some(stem) = verb.strip_suffix('y') {
if !stem.ends_with(['a', 'e', 'i', 'o', 'u']) {
return format!("{stem}ies"); }
}
if verb.ends_with('s')
|| verb.ends_with('x')
|| verb.ends_with('z')
|| verb.ends_with("ch")
|| verb.ends_with("sh")
{
return format!("{verb}es"); }
format!("{verb}s")
}
fn role_sequence(word_order: &str, transitive: bool) -> Vec<char> {
let mut chars: Vec<char> =
word_order.to_lowercase().chars().filter(|c| matches!(c, 's' | 'o' | 'v')).collect();
if chars.len() != 3 {
chars = vec!['s', 'v', 'o'];
}
if !transitive {
chars.retain(|c| *c != 'o');
}
chars
}
fn finite(gloss: &str) -> &str {
gloss.strip_prefix("to ").unwrap_or(gloss)
}
#[derive(Debug, Clone)]
pub struct ReverseTranslation {
pub source: String,
pub english: String,
pub words: Vec<(String, String)>,
pub unresolved: Vec<String>,
pub confidence: f32,
}
pub fn reverse(
phon: &Phonology,
morph: &Morphology,
typology: &BTreeMap<String, String>,
entries: &[DictionaryEntry],
surface: &str,
) -> ReverseTranslation {
let idx = ReverseIndex::build(phon, morph, entries);
let tokens: Vec<String> = surface
.split(|c: char| c.is_whitespace() || matches!(c, '.' | ',' | '!' | '?' | ';' | ':'))
.filter(|t| !t.is_empty())
.map(|t| t.to_string())
.collect();
let mut words: Vec<(String, String)> = Vec::new();
let mut unresolved: Vec<String> = Vec::new();
let mut confs: Vec<f32> = Vec::new();
let glossed: Vec<(String, bool)> = tokens
.iter()
.map(|t| match idx.lookup(t) {
Some(f) => {
words.push((t.clone(), f.gloss.clone()));
confs.push(0.9);
(f.gloss.clone(), f.plural)
}
None => {
let marked = format!("«{t}»");
words.push((t.clone(), marked.clone()));
unresolved.push(t.clone());
confs.push(0.2);
(marked, false)
}
})
.collect();
let word_order = typology.get("word_order").map(String::as_str).unwrap_or("svo");
let transitive = glossed.len() >= 3;
let roles = role_sequence(word_order, transitive);
let mut subject: Option<&(String, bool)> = None;
let mut verb: Option<&(String, bool)> = None;
let mut object: Option<&(String, bool)> = None;
for (i, g) in glossed.iter().enumerate() {
match roles.get(i) {
Some('s') => subject = Some(g),
Some('v') => verb = Some(g),
Some('o') => object = Some(g),
_ => {}
}
}
let noun_en = |(gloss, plural): &(String, bool)| -> String {
if *plural { pluralize_en(finite(gloss)) } else { finite(gloss).to_string() }
};
let subject_singular = subject.map(|(_, p)| !*p).unwrap_or(true);
let mut english = String::new();
if let Some(s) = subject {
english.push_str(&format!("the {} ", noun_en(s)));
}
if let Some((vg, _)) = verb {
let v = finite(vg);
english.push_str(&if subject_singular { verb_3sg(v) } else { v.to_string() });
}
if let Some(o) = object {
english.push_str(&format!(" the {}", noun_en(o)));
}
let english = english.trim().to_string();
let confidence =
if confs.is_empty() { 0.0 } else { confs.iter().sum::<f32>() / confs.len() as f32 };
ReverseTranslation { source: surface.to_string(), english, words, unresolved, confidence }
}
#[derive(Debug, Clone)]
pub struct CrossTranslation {
pub source: String,
pub english: String,
pub target: String,
pub words: Vec<(String, String)>,
pub unresolved: Vec<String>,
pub confidence: f32,
}
pub fn cross(from: &LangCtx, to: &LangCtx, surface: &str) -> CrossTranslation {
let rev = reverse(from.phon, from.morph, from.typology, from.entries, surface);
let fwd = super::translate(to.phon, to.morph, to.typology, to.entries, &rev.english);
let mut unresolved = rev.unresolved.clone();
unresolved.extend(fwd.unresolved.clone());
CrossTranslation {
source: surface.to_string(),
english: rev.english,
target: fwd.target,
words: fwd.words,
unresolved,
confidence: rev.confidence * fwd.confidence,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn entry(word: &str, pos: &str, translation: &str) -> DictionaryEntry {
DictionaryEntry {
word: word.into(),
pos: pos.into(),
translation: translation.into(),
..Default::default()
}
}
fn lexicon() -> Vec<DictionaryEntry> {
vec![
entry("kira", "noun", "bird"),
entry("nami", "verb", "to see"),
entry("pata", "noun", "stone"),
]
}
fn svo() -> BTreeMap<String, String> {
let mut t = BTreeMap::new();
t.insert("word_order".into(), "svo".into());
t
}
#[test]
fn reverses_an_svo_clause() {
let phon = Phonology::default();
let morph = Morphology::default();
let entries = lexicon();
let r = reverse(&phon, &morph, &svo(), &entries, "kira nami pata");
assert_eq!(r.english, "the bird sees the stone");
assert!(r.unresolved.is_empty());
assert!(r.confidence > 0.8);
}
#[test]
fn reverse_respects_word_order() {
let phon = Phonology::default();
let morph = Morphology::default();
let mut sov = BTreeMap::new();
sov.insert("word_order".to_string(), "sov".to_string());
let entries = lexicon();
let r = reverse(&phon, &morph, &sov, &entries, "kira pata nami");
assert_eq!(r.english, "the bird sees the stone");
}
#[test]
fn reverse_agrees_number_and_tense() {
let phon = Phonology::default();
let morph = Morphology::from_hjson(
r#"{
kind: "agglutinative"
morphemes: [ { id: "pl", gloss: "PL", form: "i", position: "suffix" } ]
paradigms: [ { name: "noun", cells: [
{ features: { number: "sg" }, morphemes: [] }
{ features: { number: "pl" }, morphemes: ["pl"] }
] } ]
}"#,
)
.unwrap()
.unwrap();
let entries = lexicon();
let r = reverse(&phon, &morph, &svo(), &entries, "kira nami pata");
assert_eq!(r.english, "the bird sees the stone");
let r2 = reverse(&phon, &morph, &svo(), &entries, "kirai nami patai");
assert_eq!(r2.english, "the birds see the stones");
}
#[test]
fn unknown_conlang_word_is_marked() {
let phon = Phonology::default();
let morph = Morphology::default();
let entries = lexicon();
let r = reverse(&phon, &morph, &svo(), &entries, "kira nami zuxa");
assert_eq!(r.unresolved, vec!["zuxa".to_string()]);
assert!(r.english.contains("«zuxa»"));
}
#[test]
fn cross_pivots_through_english() {
let phon = Phonology::default();
let morph = Morphology::default();
let a_entries = lexicon();
let a_typ = svo();
let mut b_typ = BTreeMap::new();
b_typ.insert("word_order".to_string(), "sov".to_string());
let b_entries = vec![
entry("turi", "noun", "bird"),
entry("vela", "verb", "to see"),
entry("moki", "noun", "stone"),
];
let from = LangCtx { phon: &phon, morph: &morph, typology: &a_typ, entries: &a_entries };
let to = LangCtx { phon: &phon, morph: &morph, typology: &b_typ, entries: &b_entries };
let c = cross(&from, &to, "kira nami pata");
assert_eq!(c.english, "the bird sees the stone");
assert_eq!(c.target, "turi moki vela");
}
}