use anyhow::{anyhow, Result};
use std::marker::PhantomData;
#[derive(Debug, Clone)]
pub struct RawText(pub String);
#[derive(Debug, Clone)]
pub struct UtlDoc {
pub tokens: Vec<String>,
pub metadata: Option<UtlMetadata>,
}
#[derive(Debug, Clone)]
pub struct UtlMetadata {
pub genre: String,
pub temporal: String,
pub emotion: String,
pub delay_ms: u64,
}
#[derive(Debug, Clone)]
pub struct HumanText<L: Language> {
_lang: PhantomData<L>,
pub text: String,
}
pub trait Language {
fn name() -> &'static str;
}
pub struct Eng;
pub struct Jpn;
pub struct Spa;
pub struct Zho; pub struct Ara; pub struct Hin;
impl Language for Eng {
fn name() -> &'static str {
"English"
}
}
impl Language for Jpn {
fn name() -> &'static str {
"Japanese"
}
}
impl Language for Spa {
fn name() -> &'static str {
"Spanish"
}
}
impl Language for Zho {
fn name() -> &'static str {
"Chinese"
}
}
impl Language for Ara {
fn name() -> &'static str {
"Arabic"
}
}
impl Language for Hin {
fn name() -> &'static str {
"Hindi"
}
}
pub trait Translate<From, To> {
fn translate(&self, input: From) -> Result<To>;
}
pub struct RawToUtl;
impl Translate<RawText, UtlDoc> for RawToUtl {
fn translate(&self, input: RawText) -> Result<UtlDoc> {
fn contains_word(text: &str, word: &str) -> bool {
text.split_whitespace().any(|w| {
let cleaned = w.trim_matches(|c: char| c.is_ascii_punctuation());
cleaned == word
})
}
let mut tokens = Vec::new();
for sentence in input.0.split('.') {
let sentence = sentence.trim().to_lowercase();
if sentence.is_empty() {
continue;
}
if sentence.contains(" i ")
|| sentence.starts_with("i ")
|| sentence.ends_with(" i")
|| sentence.contains("me")
{
tokens.push("🙋".to_string()); }
if sentence.contains(" you ")
|| sentence.starts_with("you ")
|| sentence.ends_with(" you")
|| sentence == "you"
{
tokens.push("👤".to_string()); }
if sentence.contains("love") {
tokens.push("❤️".to_string());
}
if sentence.contains("think") {
tokens.push("🧠".to_string());
}
if sentence.contains("remember") {
tokens.push("💭".to_string());
}
if contains_word(&sentence, "was")
|| contains_word(&sentence, "were")
|| contains_word(&sentence, "being")
{
tokens.push("⏮".to_string()); }
if sentence.contains("is") || sentence.contains("am") || sentence.contains("are") {
tokens.push("⏺".to_string()); }
if sentence.contains("will") {
tokens.push("⏭".to_string()); }
tokens.push("⧖".to_string());
}
Ok(UtlDoc {
tokens,
metadata: None,
})
}
}
pub struct UtlToHuman<L: Language>(PhantomData<L>);
impl Default for UtlToHuman<Eng> {
fn default() -> Self {
Self::new()
}
}
impl UtlToHuman<Eng> {
pub fn new() -> Self {
Self(PhantomData)
}
}
impl Translate<UtlDoc, HumanText<Eng>> for UtlToHuman<Eng> {
fn translate(&self, input: UtlDoc) -> Result<HumanText<Eng>> {
let mut words = Vec::new();
for token in &input.tokens {
let word = match token.as_str() {
"🙋" => "I",
"👤" => "you",
"❤️" => "love",
"🧠" => "think",
"💭" => "remember",
"⏮" => "was",
"⏺" => "is",
"⏭" => "will",
"😊" => "happy",
"😢" => "sad",
"⧖" => ".",
_ => continue,
};
words.push(word);
}
Ok(HumanText {
_lang: PhantomData,
text: words.join(" "),
})
}
}
impl Default for UtlToHuman<Jpn> {
fn default() -> Self {
Self::new()
}
}
impl UtlToHuman<Jpn> {
pub fn new() -> Self {
Self(PhantomData)
}
}
impl Translate<UtlDoc, HumanText<Jpn>> for UtlToHuman<Jpn> {
fn translate(&self, input: UtlDoc) -> Result<HumanText<Jpn>> {
let mut words = Vec::new();
for token in &input.tokens {
let word = match token.as_str() {
"🙋" => "私",
"👤" => "あなた",
"❤️" => "愛",
"🧠" => "考える",
"💭" => "思い出す",
"⏮" => "でした",
"⏺" => "です",
"⏭" => "でしょう",
"😊" => "嬉しい",
"😢" => "悲しい",
"⧖" => "。",
_ => continue,
};
words.push(word);
}
Ok(HumanText {
_lang: PhantomData,
text: words.join(""),
})
}
}
impl Default for UtlToHuman<Spa> {
fn default() -> Self {
Self::new()
}
}
impl UtlToHuman<Spa> {
pub fn new() -> Self {
Self(PhantomData)
}
}
impl Translate<UtlDoc, HumanText<Spa>> for UtlToHuman<Spa> {
fn translate(&self, input: UtlDoc) -> Result<HumanText<Spa>> {
let mut words = Vec::new();
for token in &input.tokens {
let word = match token.as_str() {
"🙋" => "yo",
"👤" => "tú",
"❤️" => "amor",
"🧠" => "pensar",
"💭" => "recordar",
"⏮" => "era",
"⏺" => "es",
"⏭" => "será",
"😊" => "feliz",
"😢" => "triste",
"⧖" => ".",
_ => continue,
};
words.push(word);
}
Ok(HumanText {
_lang: PhantomData,
text: words.join(" "),
})
}
}
pub fn to_spanish(doc: UtlDoc) -> Result<HumanText<Spa>> {
UtlToHuman::<Spa>::new().translate(doc)
}
pub fn analyze_utl(doc: &mut UtlDoc) -> Result<()> {
let mut genre = "unknown";
let mut temporal = "present";
let mut emotion = "neutral";
let past = doc.tokens.iter().filter(|t| t == &"⏮").count();
let present = doc.tokens.iter().filter(|t| t == &"⏺").count();
let future = doc.tokens.iter().filter(|t| t == &"⏭").count();
if past > present && past > future {
temporal = "past";
} else if future > present {
temporal = "future";
}
if doc.tokens.contains(&"😊".to_string()) {
emotion = "joy";
} else if doc.tokens.contains(&"😢".to_string()) {
emotion = "sadness";
}
if doc.tokens.contains(&"💭".to_string()) && temporal == "past" {
genre = "memoir";
}
doc.metadata = Some(UtlMetadata {
genre: genre.to_string(),
temporal: temporal.to_string(),
emotion: emotion.to_string(),
delay_ms: 250, });
Ok(())
}
#[cfg(feature = "mem8")]
pub fn store_mem8(doc: &UtlDoc) -> Result<()> {
println!("Storing UTL with {} tokens to MEM|8", doc.tokens.len());
Ok(())
}
#[cfg(not(feature = "mem8"))]
pub fn store_mem8(_doc: &UtlDoc) -> Result<()> {
Ok(()) }
pub fn process_to_language<L: Language>(raw: &str) -> Result<HumanText<L>>
where
UtlToHuman<L>: Translate<UtlDoc, HumanText<L>>,
{
let mut utl = RawToUtl.translate(RawText(raw.to_owned()))?;
analyze_utl(&mut utl)?;
store_mem8(&utl)?;
UtlToHuman::<L>(PhantomData).translate(utl)
}
pub fn to_english(raw: &str) -> Result<String> {
Ok(process_to_language::<Eng>(raw)?.text)
}
pub fn to_japanese(raw: &str) -> Result<String> {
Ok(process_to_language::<Jpn>(raw)?.text)
}
pub fn forbid_human_to_human<A: Language, B: Language>() -> Result<()> {
Err(anyhow!(
"FORBIDDEN: Direct {} → {} translation! Must go through UTL!",
A::name(),
B::name()
))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_enforced_pipeline() {
let result = to_english("I love you").unwrap();
assert!(result.contains("I"));
assert!(result.contains("love"));
let result = to_japanese("I love you").unwrap();
assert!(result.contains("私"));
assert!(result.contains("愛"));
}
#[test]
fn test_utl_analysis() {
let mut utl = RawToUtl
.translate(RawText("I remember being happy".into()))
.unwrap();
analyze_utl(&mut utl).unwrap();
let meta = utl.metadata.unwrap();
assert_eq!(meta.genre, "memoir");
assert_eq!(meta.temporal, "past");
}
#[test]
fn test_word_boundaries() {
let utl = RawToUtl.translate(RawText("I saw a wasp".into())).unwrap();
assert!(
!utl.tokens.contains(&"⏮".to_string()),
"wasp should not match 'was'"
);
let utl = RawToUtl
.translate(RawText("I wasn't there".into()))
.unwrap();
assert!(
!utl.tokens.contains(&"⏮".to_string()),
"wasn't should not match 'was'"
);
let utl = RawToUtl
.translate(RawText("They weren't happy".into()))
.unwrap();
assert!(
!utl.tokens.contains(&"⏮".to_string()),
"weren't should not match 'were'"
);
let utl = RawToUtl
.translate(RawText("Your wellbeing matters".into()))
.unwrap();
assert!(
!utl.tokens.contains(&"⏮".to_string()),
"wellbeing should not match 'being'"
);
let utl = RawToUtl.translate(RawText("I was happy".into())).unwrap();
assert!(utl.tokens.contains(&"⏮".to_string()), "was should match");
let utl = RawToUtl
.translate(RawText("They were happy".into()))
.unwrap();
assert!(utl.tokens.contains(&"⏮".to_string()), "were should match");
let utl = RawToUtl
.translate(RawText("I am being careful".into()))
.unwrap();
assert!(utl.tokens.contains(&"⏮".to_string()), "being should match");
}
}