#[derive(Debug, Clone)]
pub enum DocumentGenre {
Fiction, NonFiction, Memoir, Letter, Diary, Poem, Essay, Musing, Recipe, List, Unknown, }
#[derive(Debug, Clone)]
pub struct TemporalContext {
pub creation_date: Option<String>,
pub describes_period: Option<(String, String)>,
pub temporal_confidence: f32,
pub memory_delay_ms: Option<u64>,
}
#[derive(Debug, Clone)]
pub struct EmotionalContext {
pub primary_emotion: String,
pub intensity: f32,
pub emotion_blend: Vec<(String, f32)>,
}
#[derive(Debug, Clone)]
pub struct RelationalContext {
pub author: Option<String>,
pub subjects: Vec<String>,
pub audience: Option<String>,
pub relationships: Vec<(String, String, String)>, }
#[derive(Debug, Clone)]
pub struct UniversalDocument {
pub raw_text: String,
pub genre: DocumentGenre,
pub genre_confidence: f32,
pub temporal: TemporalContext,
pub emotional: EmotionalContext,
pub relational: RelationalContext,
pub utl_encoding: String,
pub wave_signature: Option<Vec<f32>>,
}
impl UniversalDocument {
pub fn classify_genre(text: &str) -> (DocumentGenre, f32) {
let lower = text.to_lowercase();
if lower.contains("once upon a time") || lower.contains("the end") {
return (DocumentGenre::Fiction, 0.8);
}
if lower.contains("dear ") && (lower.contains("sincerely") || lower.contains("love,")) {
return (DocumentGenre::Letter, 0.9);
}
if lower.contains("ingredients:") || lower.contains("instructions:") {
return (DocumentGenre::Recipe, 0.95);
}
let date_patterns = [
"january",
"february",
"march",
"april",
"may",
"june",
"july",
"august",
"september",
"october",
"november",
"december",
"1900",
"1901",
"1902",
"1903",
"1904",
"1905",
"1906",
"19",
"20", ];
let date_count = date_patterns.iter().filter(|p| lower.contains(*p)).count();
if date_count > 3 {
if lower.contains("diary") || lower.contains("journal") {
return (DocumentGenre::Diary, 0.7);
}
return (DocumentGenre::Memoir, 0.6);
}
let lines: Vec<&str> = text.lines().collect();
if lines.len() > 4 {
let avg_line_length = lines.iter().map(|l| l.len()).sum::<usize>() / lines.len();
if avg_line_length < 50 && lines.len() > 8 {
return (DocumentGenre::Poem, 0.5);
}
}
(DocumentGenre::Unknown, 0.1)
}
pub fn extract_temporal(text: &str) -> TemporalContext {
let mut dates = Vec::new();
for word in text.split_whitespace() {
if let Ok(year) = word.trim_matches(|c: char| !c.is_numeric()).parse::<i32>() {
if (1900..=2099).contains(&year) {
dates.push(year);
}
}
}
let (_start_year, _end_year) = if !dates.is_empty() {
dates.sort();
(dates[0], dates[dates.len() - 1])
} else {
(1900, 2024) };
TemporalContext {
creation_date: None, describes_period: None, temporal_confidence: if dates.is_empty() { 0.1 } else { 0.5 },
memory_delay_ms: Some(250), }
}
pub fn extract_emotional(text: &str) -> EmotionalContext {
let lower = text.to_lowercase();
let joy_words = ["happy", "joy", "love", "wonderful", "beautiful", "excited"];
let sad_words = ["sad", "died", "loss", "grief", "mourn", "tears"];
let anger_words = ["angry", "furious", "mad", "hate", "rage"];
let fear_words = ["afraid", "scared", "fear", "terror", "worried"];
let mut scores = vec![
(
"😊",
joy_words.iter().filter(|w| lower.contains(**w)).count() as f32,
),
(
"😢",
sad_words.iter().filter(|w| lower.contains(**w)).count() as f32,
),
(
"😡",
anger_words.iter().filter(|w| lower.contains(**w)).count() as f32,
),
(
"😨",
fear_words.iter().filter(|w| lower.contains(**w)).count() as f32,
),
];
scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
let total: f32 = scores.iter().map(|(_, s)| s).sum();
let primary = if total > 0.0 {
(scores[0].0.to_string(), scores[0].1 / total.max(1.0))
} else {
("😐".to_string(), 0.5) };
EmotionalContext {
primary_emotion: primary.0,
intensity: primary.1,
emotion_blend: scores
.into_iter()
.filter(|(_, s)| *s > 0.0)
.map(|(e, s)| (e.to_string(), s / total.max(1.0)))
.collect(),
}
}
pub fn extract_relational(text: &str) -> RelationalContext {
let mut subjects = Vec::new();
for word in text.split_whitespace() {
let clean = word.trim_matches(|c: char| !c.is_alphabetic());
if clean.len() > 2 && clean.chars().next().unwrap().is_uppercase() {
let common_names = ["Mike", "Alice", "Dad", "Mom", "Jody", "Bill", "Maude"];
if common_names.contains(&clean) && !subjects.contains(&clean.to_string()) {
subjects.push(clean.to_string());
}
}
}
RelationalContext {
author: None, subjects,
audience: None,
relationships: Vec::new(), }
}
pub fn to_theoglyphic(&self) -> String {
let mut utl = String::new();
utl.push_str(match self.genre {
DocumentGenre::Fiction => "📖", DocumentGenre::Memoir => "🧠💭", DocumentGenre::Letter => "✉️", DocumentGenre::Diary => "📔", DocumentGenre::Poem => "🎵", _ => "📄", });
if self.temporal.temporal_confidence > 0.3 {
utl.push('⏰'); }
utl.push_str(&self.emotional.primary_emotion);
if !self.relational.subjects.is_empty() {
utl.push('👥'); }
if let Some(delay) = self.temporal.memory_delay_ms {
if delay > 500 {
utl.push('⧖'); }
}
utl
}
pub fn from_text(text: String) -> Self {
let (genre, genre_confidence) = Self::classify_genre(&text);
let temporal = Self::extract_temporal(&text);
let emotional = Self::extract_emotional(&text);
let relational = Self::extract_relational(&text);
let mut doc = UniversalDocument {
raw_text: text,
genre,
genre_confidence,
temporal,
emotional,
relational,
utl_encoding: String::new(),
wave_signature: None,
};
doc.utl_encoding = doc.to_theoglyphic();
doc
}
}
pub fn publisher_to_universal(pub_text: &str) -> UniversalDocument {
let cleaned = pub_text
.replace("CHNKINK", "")
.replace("TEXTTEXT", "")
.replace("FDPP", "")
.replace("STSH", "")
.lines()
.filter(|line| {
!line.contains("")
&& line
.chars()
.filter(|c| c.is_ascii_graphic() || c.is_whitespace())
.count()
> line.len() / 2
})
.collect::<Vec<_>>()
.join("\n");
UniversalDocument::from_text(cleaned)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_genre_classification() {
let fiction = "Once upon a time there was a princess. The end.";
let (genre, conf) = UniversalDocument::classify_genre(fiction);
assert!(matches!(genre, DocumentGenre::Fiction));
assert!(conf > 0.7);
let letter = "Dear Mom,\nI hope you are well.\nLove, Jody";
let (genre, conf) = UniversalDocument::classify_genre(letter);
assert!(matches!(genre, DocumentGenre::Letter));
assert!(conf > 0.8);
}
#[test]
fn test_emotional_extraction() {
let happy = "I'm so happy and excited about this wonderful day!";
let emotional = UniversalDocument::extract_emotional(happy);
assert_eq!(emotional.primary_emotion, "😊");
let sad = "She died and we all mourned her loss with tears.";
let emotional = UniversalDocument::extract_emotional(sad);
assert_eq!(emotional.primary_emotion, "😢");
}
#[test]
fn test_theoglyphic_encoding() {
let text = "Dear Alice, I remember our happy times in 1927. Love, Mike";
let doc = UniversalDocument::from_text(text.to_string());
assert!(doc.utl_encoding.contains("✉️"));
assert!(doc.utl_encoding.contains("😊"));
assert!(doc.utl_encoding.contains("👥"));
}
}