marqant 1.0.0 - Docs.rs

//! Type-level enforced UTL pipeline
//!
//! Makes Human→Human translation IMPOSSIBLE at compile time!

use anyhow::{anyhow, Result};
use std::marker::PhantomData;

// ---------- Core types ----------

#[derive(Debug, Clone)]
pub struct RawText(pub String);

#[derive(Debug, Clone)]
pub struct UtlDoc {
    /// UTL token stream - the universal representation
    pub tokens: Vec<String>,
    /// Optional metadata from analysis
    pub metadata: Option<UtlMetadata>,
}

#[derive(Debug, Clone)]
pub struct UtlMetadata {
    pub genre: String,
    pub temporal: String,
    pub emotion: String,
    pub delay_ms: u64,
}

#[derive(Debug, Clone)]
pub struct HumanText<L: Language> {
    _lang: PhantomData<L>,
    pub text: String,
}

// Marker trait: only languages we explicitly allow
pub trait Language {
    fn name() -> &'static str;
}

pub struct Eng;
pub struct Jpn;
pub struct Spa;
pub struct Zho; // Chinese
pub struct Ara; // Arabic
pub struct Hin; // Hindi

impl Language for Eng {
    fn name() -> &'static str {
        "English"
    }
}
impl Language for Jpn {
    fn name() -> &'static str {
        "Japanese"
    }
}
impl Language for Spa {
    fn name() -> &'static str {
        "Spanish"
    }
}
impl Language for Zho {
    fn name() -> &'static str {
        "Chinese"
    }
}
impl Language for Ara {
    fn name() -> &'static str {
        "Arabic"
    }
}
impl Language for Hin {
    fn name() -> &'static str {
        "Hindi"
    }
}

// ---------- Translation trait (directional) ----------

pub trait Translate<From, To> {
    fn translate(&self, input: From) -> Result<To>;
}

// ---------- ONLY Allowed translators ----------

/// Raw text to UTL - the ONLY entry point
pub struct RawToUtl;

impl Translate<RawText, UtlDoc> for RawToUtl {
    fn translate(&self, input: RawText) -> Result<UtlDoc> {
        // Helper function to check if a word exists with proper boundaries
        fn contains_word(text: &str, word: &str) -> bool {
            text.split_whitespace().any(|w| {
                // Remove common punctuation and compare
                let cleaned = w.trim_matches(|c: char| c.is_ascii_punctuation());
                cleaned == word
            })
        }

        let mut tokens = Vec::new();

        // Real UTL tokenization with theoglyphic symbols
        for sentence in input.0.split('.') {
            let sentence = sentence.trim().to_lowercase();
            if sentence.is_empty() {
                continue;
            }

            // Convert to UTL symbols
            if sentence.contains(" i ")
                || sentence.starts_with("i ")
                || sentence.ends_with(" i")
                || sentence.contains("me")
            {
                tokens.push("🙋".to_string()); // Self
            }
            if sentence.contains(" you ")
                || sentence.starts_with("you ")
                || sentence.ends_with(" you")
                || sentence == "you"
            {
                tokens.push("👤".to_string()); // Other
            }
            if sentence.contains("love") {
                tokens.push("❤️".to_string());
            }
            if sentence.contains("think") {
                tokens.push("🧠".to_string());
            }
            if sentence.contains("remember") {
                tokens.push("💭".to_string());
            }
            if contains_word(&sentence, "was")
                || contains_word(&sentence, "were")
                || contains_word(&sentence, "being")
            {
                tokens.push("⏮".to_string()); // Past
            }
            if sentence.contains("is") || sentence.contains("am") || sentence.contains("are") {
                tokens.push("⏺".to_string()); // Present
            }
            if sentence.contains("will") {
                tokens.push("⏭".to_string()); // Future
            }

            // Add UDC delay marker between thoughts
            tokens.push("⧖".to_string());
        }

        Ok(UtlDoc {
            tokens,
            metadata: None,
        })
    }
}

/// UTL to human language - the ONLY exit point
pub struct UtlToHuman<L: Language>(PhantomData<L>);

impl Default for UtlToHuman<Eng> {
    fn default() -> Self {
        Self::new()
    }
}

impl UtlToHuman<Eng> {
    pub fn new() -> Self {
        Self(PhantomData)
    }
}

impl Translate<UtlDoc, HumanText<Eng>> for UtlToHuman<Eng> {
    fn translate(&self, input: UtlDoc) -> Result<HumanText<Eng>> {
        let mut words = Vec::new();

        for token in &input.tokens {
            let word = match token.as_str() {
                "🙋" => "I",
                "👤" => "you",
                "❤️" => "love",
                "🧠" => "think",
                "💭" => "remember",
                "⏮" => "was",
                "⏺" => "is",
                "⏭" => "will",
                "😊" => "happy",
                "😢" => "sad",
                "⧖" => ".",
                _ => continue,
            };
            words.push(word);
        }

        Ok(HumanText {
            _lang: PhantomData,
            text: words.join(" "),
        })
    }
}

impl Default for UtlToHuman<Jpn> {
    fn default() -> Self {
        Self::new()
    }
}

impl UtlToHuman<Jpn> {
    pub fn new() -> Self {
        Self(PhantomData)
    }
}

impl Translate<UtlDoc, HumanText<Jpn>> for UtlToHuman<Jpn> {
    fn translate(&self, input: UtlDoc) -> Result<HumanText<Jpn>> {
        let mut words = Vec::new();

        for token in &input.tokens {
            let word = match token.as_str() {
                "🙋" => "私",
                "👤" => "あなた",
                "❤️" => "愛",
                "🧠" => "考える",
                "💭" => "思い出す",
                "⏮" => "でした",
                "⏺" => "です",
                "⏭" => "でしょう",
                "😊" => "嬉しい",
                "😢" => "悲しい",
                "⧖" => "。",
                _ => continue,
            };
            words.push(word);
        }

        Ok(HumanText {
            _lang: PhantomData,
            text: words.join(""),
        })
    }
}

impl Default for UtlToHuman<Spa> {
    fn default() -> Self {
        Self::new()
    }
}

impl UtlToHuman<Spa> {
    pub fn new() -> Self {
        Self(PhantomData)
    }
}

impl Translate<UtlDoc, HumanText<Spa>> for UtlToHuman<Spa> {
    fn translate(&self, input: UtlDoc) -> Result<HumanText<Spa>> {
        let mut words = Vec::new();

        for token in &input.tokens {
            let word = match token.as_str() {
                "🙋" => "yo",
                "👤" => "tú",
                "❤️" => "amor",
                "🧠" => "pensar",
                "💭" => "recordar",
                "⏮" => "era",
                "⏺" => "es",
                "⏭" => "será",
                "😊" => "feliz",
                "😢" => "triste",
                "⧖" => ".",
                _ => continue,
            };
            words.push(word);
        }

        Ok(HumanText {
            _lang: PhantomData,
            text: words.join(" "),
        })
    }
}

/// Convenience helper: Translate UTL to Spanish
pub fn to_spanish(doc: UtlDoc) -> Result<HumanText<Spa>> {
    UtlToHuman::<Spa>::new().translate(doc)
}
// ---------- FORBIDDEN paths (intentionally UNIMPLEMENTED) ----------
//
// These will NEVER compile:
// - No impl Translate<HumanText<Eng>, HumanText<Jpn>>
// - No impl Translate<HumanText<Jpn>, HumanText<Eng>>
// - No impl Translate<RawText, HumanText<L>> directly
// - No impl Translate<HumanText<L>, RawText>
//
// If someone tries, Rust compiler says NO! 🚫

// ---------- Analysis & Storage hooks ----------

pub fn analyze_utl(doc: &mut UtlDoc) -> Result<()> {
    // Analyze the UTL symbols, not text!
    let mut genre = "unknown";
    let mut temporal = "present";
    let mut emotion = "neutral";

    // Count temporal markers
    let past = doc.tokens.iter().filter(|t| t == &"⏮").count();
    let present = doc.tokens.iter().filter(|t| t == &"⏺").count();
    let future = doc.tokens.iter().filter(|t| t == &"⏭").count();

    if past > present && past > future {
        temporal = "past";
    } else if future > present {
        temporal = "future";
    }

    // Detect emotion
    if doc.tokens.contains(&"😊".to_string()) {
        emotion = "joy";
    } else if doc.tokens.contains(&"😢".to_string()) {
        emotion = "sadness";
    }

    // Detect genre from patterns
    if doc.tokens.contains(&"💭".to_string()) && temporal == "past" {
        genre = "memoir";
    }

    doc.metadata = Some(UtlMetadata {
        genre: genre.to_string(),
        temporal: temporal.to_string(),
        emotion: emotion.to_string(),
        delay_ms: 250, // UDC delay
    });

    Ok(())
}

#[cfg(feature = "mem8")]
pub fn store_mem8(doc: &UtlDoc) -> Result<()> {
    // TODO: Wire to actual MEM|8
    println!("Storing UTL with {} tokens to MEM|8", doc.tokens.len());
    Ok(())
}

#[cfg(not(feature = "mem8"))]
pub fn store_mem8(_doc: &UtlDoc) -> Result<()> {
    Ok(()) // No-op when MEM|8 not compiled in
}

// ---------- One-shot pipeline (the ONLY way) ----------

pub fn process_to_language<L: Language>(raw: &str) -> Result<HumanText<L>>
where
    UtlToHuman<L>: Translate<UtlDoc, HumanText<L>>,
{
    // Step 1: Raw → UTL (mandatory)
    let mut utl = RawToUtl.translate(RawText(raw.to_owned()))?;

    // Step 2: Analyze UTL
    analyze_utl(&mut utl)?;

    // Step 3: Store in MEM|8
    store_mem8(&utl)?;

    // Step 4: UTL → Human language
    UtlToHuman::<L>(PhantomData).translate(utl)
}

// ---------- Convenience helpers ----------

pub fn to_english(raw: &str) -> Result<String> {
    Ok(process_to_language::<Eng>(raw)?.text)
}

pub fn to_japanese(raw: &str) -> Result<String> {
    Ok(process_to_language::<Jpn>(raw)?.text)
}

// ---------- Runtime guard against sneaky bypasses ----------

/// This function will ALWAYS error - it's a honeypot for bad code
pub fn forbid_human_to_human<A: Language, B: Language>() -> Result<()> {
    Err(anyhow!(
        "FORBIDDEN: Direct {} → {} translation! Must go through UTL!",
        A::name(),
        B::name()
    ))
}

// ---------- Example of impossible code ----------
//
// This WILL NOT COMPILE (uncomment to verify):
//
// pub fn bad_translator(eng: HumanText<Eng>) -> HumanText<Jpn> {
//     // ERROR: no impl of Translate<HumanText<Eng>, HumanText<Jpn>>
//     SomeTranslator.translate(eng)  // ← Compile error!
// }

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_enforced_pipeline() {
        // This works - goes through UTL
        let result = to_english("I love you").unwrap();
        assert!(result.contains("I"));
        assert!(result.contains("love"));

        let result = to_japanese("I love you").unwrap();
        assert!(result.contains("私"));
        assert!(result.contains("愛"));
    }

    #[test]
    fn test_utl_analysis() {
        let mut utl = RawToUtl
            .translate(RawText("I remember being happy".into()))
            .unwrap();
        analyze_utl(&mut utl).unwrap();

        let meta = utl.metadata.unwrap();
        assert_eq!(meta.genre, "memoir");
        assert_eq!(meta.temporal, "past");
    }

    #[test]
    fn test_word_boundaries() {
        // Test that word boundaries prevent false matches
        // "wasp" should not match "was"
        let utl = RawToUtl.translate(RawText("I saw a wasp".into())).unwrap();
        assert!(
            !utl.tokens.contains(&"⏮".to_string()),
            "wasp should not match 'was'"
        );

        // "wasn't" should not match "was"
        let utl = RawToUtl
            .translate(RawText("I wasn't there".into()))
            .unwrap();
        assert!(
            !utl.tokens.contains(&"⏮".to_string()),
            "wasn't should not match 'was'"
        );

        // "weren't" should not match "were"
        let utl = RawToUtl
            .translate(RawText("They weren't happy".into()))
            .unwrap();
        assert!(
            !utl.tokens.contains(&"⏮".to_string()),
            "weren't should not match 'were'"
        );

        // "wellbeing" should not match "being"
        let utl = RawToUtl
            .translate(RawText("Your wellbeing matters".into()))
            .unwrap();
        assert!(
            !utl.tokens.contains(&"⏮".to_string()),
            "wellbeing should not match 'being'"
        );

        // But actual words should still match
        let utl = RawToUtl.translate(RawText("I was happy".into())).unwrap();
        assert!(utl.tokens.contains(&"⏮".to_string()), "was should match");

        let utl = RawToUtl
            .translate(RawText("They were happy".into()))
            .unwrap();
        assert!(utl.tokens.contains(&"⏮".to_string()), "were should match");

        let utl = RawToUtl
            .translate(RawText("I am being careful".into()))
            .unwrap();
        assert!(utl.tokens.contains(&"⏮".to_string()), "being should match");
    }

    // This test WILL NOT COMPILE if uncommented:
    // #[test]
    // fn test_forbidden_human_to_human() {
    //     let eng = HumanText::<Eng> { _lang: PhantomData, text: "Hello".into() };
    //     let jpn: HumanText<Jpn> = BadTranslator.translate(eng); // COMPILE ERROR!
    // }
}