pub mod corpus;
pub mod english;
pub mod eval;
pub mod export;
pub mod lexmap;
pub mod memory;
pub mod reverse;
use std::collections::BTreeMap;
use crate::conlang::syntax::{self, Clause, NounPhrase, Word};
use crate::conlang::types::morphology::Morphology;
use crate::conlang::Phonology;
use crate::language_entry::DictionaryEntry;
use english::EnglishNp;
use lexmap::{GlossIndex, Mapping, PosHint};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Tier {
Rbmt,
}
impl Tier {
pub fn label(self) -> &'static str {
match self {
Tier::Rbmt => "Tier 1 RBMT",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Decision {
LexiconLookup { word: String, pos: String },
Untranslatable,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Alternative {
pub text: String,
pub source: &'static str,
pub confidence: f32,
pub rationale: String,
}
#[derive(Debug, Clone, PartialEq)]
pub struct TraceEntry {
pub source: String,
pub role: &'static str,
pub target: String,
pub decision: Decision,
pub confidence: f32,
}
#[derive(Debug, Clone)]
pub struct Translation {
pub source: String,
pub target: String,
pub words: Vec<(String, String)>,
pub literal: String,
pub confidence: f32,
pub trace: Vec<TraceEntry>,
pub unresolved: Vec<String>,
pub alternatives: Vec<Alternative>,
pub tier: Tier,
}
fn resolve(idx: &GlossIndex, lemma: &str, hint: PosHint) -> (String, Decision, f32) {
match idx.map(lemma, hint) {
Mapping::Found { word, pos } => {
let decision = Decision::LexiconLookup { word: word.clone(), pos };
(word, decision, 0.9)
}
Mapping::Missing => (format!("«{lemma}»"), Decision::Untranslatable, 0.2),
}
}
fn structure(idx: &GlossIndex, text: &str) -> english::EnglishClause {
use english::{EnglishClause, EnglishNp};
let prep = english::prepare(text);
let toks = &prep.tokens;
let verb_idx = toks.iter().position(|t| {
idx.has_sense(&english::delemmatize_verb(t), PosHint::Verb)
|| idx.has_sense(t, PosHint::Verb)
});
let Some(vi) = verb_idx else {
return english::analyze(text);
};
let build_np = |region: &[String]| -> Option<EnglishNp> {
let head = region.last()?;
let (lemma, plural) = english::depluralize(head);
let number = if plural { "pl".to_string() } else { "sg".to_string() };
let adjective = if region.len() >= 2 {
let cand = ®ion[region.len() - 2];
idx.has_sense(cand, PosHint::Adjective).then(|| cand.clone())
} else {
None
};
Some(EnglishNp { head: lemma, number, adjective })
};
let subject = if prep.pronoun_subject { None } else { build_np(&toks[..vi]) };
let object = build_np(&toks[vi + 1..]);
EnglishClause {
subject,
verb: Some(english::delemmatize_verb(&toks[vi])),
verb_person: prep.person,
object,
}
}
fn map_np(
idx: &GlossIndex,
np: &EnglishNp,
role: &'static str,
trace: &mut Vec<TraceEntry>,
unresolved: &mut Vec<String>,
) -> NounPhrase {
let (root, decision, conf) = resolve(idx, &np.head, PosHint::Noun);
if matches!(decision, Decision::Untranslatable) {
unresolved.push(np.head.clone());
}
trace.push(TraceEntry {
source: np.head.clone(),
role,
target: root.clone(),
decision,
confidence: conf,
});
let adjective = np.adjective.as_ref().map(|adj| {
let (aroot, adecision, aconf) = resolve(idx, adj, PosHint::Adjective);
if matches!(adecision, Decision::Untranslatable) {
unresolved.push(adj.clone());
}
trace.push(TraceEntry {
source: adj.clone(),
role: "adjective",
target: aroot.clone(),
decision: adecision,
confidence: aconf,
});
Word { root: aroot, gloss: adj.clone() }
});
NounPhrase {
head: Word { root, gloss: np.head.clone() },
number: np.number.clone(),
adjective,
}
}
pub fn translate(
phon: &Phonology,
morph: &Morphology,
typology: &BTreeMap<String, String>,
entries: &[DictionaryEntry],
text: &str,
) -> Translation {
let idx = GlossIndex::build(entries);
let parse = structure(&idx, text);
let mut trace: Vec<TraceEntry> = Vec::new();
let mut unresolved: Vec<String> = Vec::new();
let subject = parse.subject.as_ref().map(|np| map_np(&idx, np, "subject", &mut trace, &mut unresolved));
let object = parse.object.as_ref().map(|np| map_np(&idx, np, "object", &mut trace, &mut unresolved));
let verb = parse.verb.as_ref().map(|v| {
let (root, decision, conf) = resolve(&idx, v, PosHint::Verb);
if matches!(decision, Decision::Untranslatable) {
unresolved.push(v.clone());
}
trace.push(TraceEntry {
source: v.clone(),
role: "verb",
target: root.clone(),
decision,
confidence: conf,
});
Word { root, gloss: v.clone() }
});
let clause = Clause {
subject,
verb,
verb_person: parse.verb_person.clone(),
object,
noun_paradigm: "noun".into(),
verb_paradigm: "verb".into(),
..Default::default()
};
let rendered = syntax::assemble(phon, morph, typology, &clause);
let confidence = if trace.is_empty() {
0.0
} else {
trace.iter().map(|t| t.confidence).sum::<f32>() / trace.len() as f32
};
Translation {
source: text.to_string(),
target: rendered.surface,
words: rendered.words,
literal: rendered.literal,
confidence,
trace,
unresolved,
alternatives: Vec::new(),
tier: Tier::Rbmt,
}
}
pub fn apply_memory(
mut t: Translation,
mem: &memory::TranslationMemory,
query_embedding: Option<&[f32]>,
) -> Translation {
match mem.best(&t.source, query_embedding) {
memory::MemoryHit::Exact { conlang } => {
if conlang != t.target {
t.alternatives.insert(
0,
Alternative {
text: t.target.clone(),
source: "rbmt",
confidence: t.confidence,
rationale: "rule-based".to_string(),
},
);
t.target = conlang;
}
t.confidence = t.confidence.max(0.99);
}
memory::MemoryHit::Fuzzy { conlang, score, english } => {
t.alternatives.push(Alternative {
text: conlang,
source: "translation-memory",
confidence: score,
rationale: format!("translation memory · {:.0}% match to \"{english}\"", score * 100.0),
});
}
memory::MemoryHit::None => {}
}
t
}
#[cfg(test)]
mod tests {
use super::*;
use crate::conlang::types::morphology::Morphology;
fn entry(word: &str, pos: &str, translation: &str) -> DictionaryEntry {
DictionaryEntry {
word: word.into(),
pos: pos.into(),
translation: translation.into(),
..Default::default()
}
}
fn lexicon() -> Vec<DictionaryEntry> {
vec![
entry("kira", "noun", "bird"),
entry("nami", "verb", "to see"),
entry("pata", "noun", "stone"),
]
}
#[test]
fn translates_a_simple_svo_sentence() {
let phon = Phonology::default();
let morph = Morphology::default();
let mut typ = BTreeMap::new();
typ.insert("word_order".to_string(), "svo".to_string());
let entries = lexicon();
let t = translate(&phon, &morph, &typ, &entries, "the bird sees the stone");
assert!(t.unresolved.is_empty());
assert_eq!(t.trace.len(), 3);
assert_eq!(t.target, "kira nami pata");
assert!(t.confidence > 0.8);
}
#[test]
fn sov_order_is_respected() {
let phon = Phonology::default();
let morph = Morphology::default();
let mut typ = BTreeMap::new();
typ.insert("word_order".to_string(), "sov".to_string());
let entries = lexicon();
let t = translate(&phon, &morph, &typ, &entries, "the bird sees the stone");
assert_eq!(t.target, "kira pata nami");
}
#[test]
fn recovers_and_places_an_adjective() {
let phon = Phonology::default();
let morph = Morphology::default();
let mut typ = BTreeMap::new();
typ.insert("word_order".to_string(), "svo".to_string());
let mut entries = lexicon();
entries.push(entry("mira", "adjective", "bright"));
let t = translate(&phon, &morph, &typ, &entries, "the bright bird sees the stone");
assert!(t.unresolved.is_empty(), "all words resolved: {:?}", t.unresolved);
assert_eq!(t.target, "mira kira nami pata");
assert_eq!(t.trace.len(), 4);
}
#[test]
fn memory_overrides_with_rbmt_as_alternative() {
let phon = Phonology::default();
let morph = Morphology::default();
let mut typ = BTreeMap::new();
typ.insert("word_order".to_string(), "svo".to_string());
let entries = lexicon();
let t = translate(&phon, &morph, &typ, &entries, "the bird sees the stone");
assert_eq!(t.target, "kira nami pata");
let mut mem = memory::TranslationMemory::default();
mem.add("the bird sees the stone", "kira pata-corrected nami");
let t2 = apply_memory(t, &mem, None);
assert_eq!(t2.target, "kira pata-corrected nami");
assert_eq!(t2.alternatives.len(), 1);
assert_eq!(t2.alternatives[0].text, "kira nami pata");
assert!(t2.confidence > 0.98);
}
#[test]
fn unresolved_words_are_marked_and_listed() {
let phon = Phonology::default();
let morph = Morphology::default();
let typ = BTreeMap::new();
let entries = lexicon();
let t = translate(&phon, &morph, &typ, &entries, "the dragon sees the stone");
assert_eq!(t.unresolved, vec!["dragon".to_string()]);
assert!(t.target.contains("«dragon»"));
assert!(t.confidence < 0.8);
}
}