use crate::error::{LmmError, Result};
use crate::lexicon::word_tone;
static TRANSITIVE_VERBS: &[(&str, f64)] = &[
("reveals", 113.1),
("encodes", 107.7),
("governs", 111.3),
("shapes", 106.8),
("defines", 107.8),
("captures", 109.3),
("reflects", 109.6),
("transforms", 115.8),
("determines", 113.5),
("expresses", 116.4),
("describes", 109.6),
("manifests", 112.8),
("illuminates", 115.3),
("compresses", 112.5),
("represents", 116.9),
("unveils", 109.8),
("generates", 114.5),
("produces", 112.0),
("enables", 106.8),
("connects", 109.0),
];
static LINKING_VERBS: &[(&str, f64)] = &[
("is", 109.5),
("are", 107.7),
("forms", 108.3),
("becomes", 111.9),
("remains", 111.5),
("holds", 107.0),
];
static SUBJECT_NOUNS: &[(&str, f64)] = &[
("mathematics", 112.4),
("geometry", 113.2),
("physics", 108.5),
("logic", 107.3),
("algebra", 105.3),
("calculus", 108.8),
("symmetry", 116.7),
("entropy", 113.7),
("topology", 116.8),
("probability", 114.2),
("computation", 116.8),
("information", 115.7),
("simulation", 116.0),
("equation", 111.0),
("analysis", 110.4),
("recursion", 113.3),
("resonance", 113.3),
("frequency", 115.4),
("wavelength", 113.8),
("dimension", 113.8),
("structure", 115.3),
("pattern", 110.3),
("gradient", 110.3),
("divergence", 113.0),
("integration", 115.4),
("transformation", 119.5),
];
static OBJECT_NOUNS: &[(&str, f64)] = &[
("reality", 112.5),
("truth", 110.6),
("complexity", 116.3),
("order", 108.2),
("chaos", 103.5),
("harmony", 109.0),
("existence", 114.0),
("nature", 109.2),
("matter", 108.2),
("energy", 107.2),
("time", 108.2),
("space", 107.5),
("motion", 107.5),
("change", 107.2),
("balance", 106.3),
("infinity", 111.9),
("symmetry", 116.7),
("unity", 111.7),
("identity", 112.6),
("causality", 113.0),
("meaning", 108.7),
("knowledge", 112.0),
("perception", 113.2),
("boundaries", 115.1),
("limits", 107.5),
("foundations", 117.7),
];
static ADJECTIVES: &[(&str, f64)] = &[
("fundamental", 116.1),
("mathematical", 114.5),
("universal", 115.0),
("infinite", 110.8),
("precise", 111.8),
("elegant", 109.6),
("structural", 116.0),
("invariant", 113.0),
("dynamic", 109.7),
("recursive", 113.8),
("continuous", 115.4),
("discrete", 112.3),
("deterministic", 120.0),
("probabilistic", 117.5),
("axiomatic", 116.0),
("abstract", 109.6),
("emergent", 111.0),
("coherent", 112.3),
("symmetric", 117.8),
("bounded", 109.8),
];
static SENTENCE_CONNECTORS: &[&str] = &[
"Furthermore,",
"Moreover,",
"Indeed,",
"Consequently,",
"In essence,",
"At its core,",
"As a result,",
"Fundamentally,",
"More precisely,",
"By extension,",
"Through this lens,",
"In this framework,",
];
static PREPOSITIONS_RICH: &[&str] = &[
"of",
"within",
"beyond",
"through",
"across",
"beneath",
"inside",
"underlying",
"pervading",
"governing",
];
fn text_tone(s: &str) -> f64 {
let bytes: Vec<u8> = s.bytes().filter(|b| b.is_ascii_alphabetic()).collect();
if bytes.is_empty() {
return 110.0;
}
bytes.iter().map(|&b| b as f64).sum::<f64>() / bytes.len() as f64
}
fn offset_by_tone<'a>(pool: &'a [(&'a str, f64)], target: f64, offset: usize) -> &'a str {
let mut scored: Vec<(f64, &str)> = pool
.iter()
.map(|(w, t)| ((*t - target).abs(), *w))
.collect();
scored.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
scored
.get(offset % scored.len().max(1))
.map(|(_, w)| *w)
.unwrap_or("")
}
fn offset_by_tone_not<'a>(
pool: &'a [(&'a str, f64)],
target: f64,
offset: usize,
exclude: &str,
) -> &'a str {
let mut scored: Vec<(f64, &str)> = pool
.iter()
.filter(|(w, _)| *w != exclude)
.map(|(w, t)| ((*t - target).abs(), *w))
.collect();
scored.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
scored
.get(offset % scored.len().max(1))
.map(|(_, w)| *w)
.unwrap_or("")
}
fn split_into_sentences(text: &str) -> Vec<String> {
let mut sentences: Vec<String> = Vec::new();
let mut current = String::new();
let chars: Vec<char> = text.chars().collect();
let len = chars.len();
for i in 0..len {
let ch = chars[i];
current.push(ch);
if ch == '.' || ch == '!' || ch == '?' {
let prev_is_digit = i > 0 && chars[i - 1].is_ascii_digit();
let next_is_digit = i + 1 < len && chars[i + 1].is_ascii_digit();
if ch == '.' && prev_is_digit && next_is_digit {
continue;
}
let next_is_lower = i + 1 < len && chars[i + 1].is_ascii_lowercase();
if ch == '.' && next_is_lower {
continue;
}
let s = current.trim().to_string();
if s.split_whitespace().count() >= 5 {
sentences.push(s);
}
current.clear();
}
}
let tail = current.trim().to_string();
if tail.split_whitespace().count() >= 5 {
sentences.push(tail);
}
sentences
}
fn keywords_from_text(text: &str, n: usize) -> Vec<String> {
let verb_set: std::collections::HashSet<&str> = TRANSITIVE_VERBS
.iter()
.map(|(w, _)| *w)
.chain(LINKING_VERBS.iter().map(|(w, _)| *w))
.chain(ADJECTIVES.iter().map(|(w, _)| *w))
.chain(
[
"this", "that", "their", "also", "with", "from", "into", "will", "have", "been",
"more", "very", "most", "such", "than", "when", "early", "first", "some", "only",
"both", "each", "many", "other", "modern", "ancient",
]
.iter()
.copied(),
)
.collect();
let mut seen = std::collections::HashSet::new();
let mut word_tones: Vec<(f64, String)> = text
.split_whitespace()
.filter_map(|w| {
let clean: String = w
.chars()
.filter(|c| c.is_ascii_alphabetic())
.collect::<String>()
.to_ascii_lowercase();
if clean.len() >= 4 && !verb_set.contains(clean.as_str()) && seen.insert(clean.clone())
{
Some((word_tone(&clean), clean))
} else {
None
}
})
.collect();
word_tones.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
word_tones.into_iter().take(n).map(|(_, w)| w).collect()
}
fn seed_hash(s: &str) -> usize {
s.bytes().enumerate().fold(0usize, |acc, (i, b)| {
acc.wrapping_add((b as usize).wrapping_mul(i.wrapping_add(31)))
})
}
pub struct SentenceGenerator {
pub iterations: usize,
pub depth: usize,
}
impl SentenceGenerator {
pub fn new(iterations: usize, depth: usize) -> Self {
Self { iterations, depth }
}
pub fn generate_variant(&self, seed: &str, variant: usize) -> Result<String> {
let tone = text_tone(seed);
let sh = seed_hash(seed);
let keywords = keywords_from_text(seed, 3);
let topic = keywords.first().map(|s| s.as_str()).unwrap_or("reality");
let v = variant.wrapping_add(sh);
let sentence = match variant % 6 {
0 => {
let subj = offset_by_tone(SUBJECT_NOUNS, tone, v);
let verb = offset_by_tone(TRANSITIVE_VERBS, tone, v + 1);
let adj = offset_by_tone(ADJECTIVES, tone - 2.0, v + 2);
let obj = offset_by_tone(OBJECT_NOUNS, tone + 1.0, v + 3);
format!(
"{} {} the {} {} of the {}.",
capitalize(subj),
verb,
adj,
obj,
topic
)
}
1 => {
let adj = offset_by_tone(ADJECTIVES, tone, v);
let subj = offset_by_tone(SUBJECT_NOUNS, tone + 2.0, v + 1);
let verb = offset_by_tone(TRANSITIVE_VERBS, tone + 1.0, v + 2);
let obj = offset_by_tone(OBJECT_NOUNS, tone - 2.0, v + 3);
format!("The {} {} {} {}.", adj, subj, verb, obj)
}
2 => {
let subj = offset_by_tone(SUBJECT_NOUNS, tone + 1.5, v);
let link = offset_by_tone(LINKING_VERBS, tone, v + 1);
let adj = offset_by_tone(ADJECTIVES, tone + 3.0, v + 2);
let prep = PREPOSITIONS_RICH[v % PREPOSITIONS_RICH.len()];
let obj = offset_by_tone_not(OBJECT_NOUNS, tone - 1.0, v + 3, topic);
format!(
"{} {} the {} {} {} {}.",
capitalize(subj),
link,
adj,
topic,
prep,
obj
)
}
3 => {
let subj = offset_by_tone(SUBJECT_NOUNS, tone, v + 1);
let verb = offset_by_tone(TRANSITIVE_VERBS, tone + 2.0, v + 2);
let obj = offset_by_tone(OBJECT_NOUNS, tone, v + 3);
format!("The {} of {} {} {}.", subj, topic, verb, obj)
}
4 => {
let connector = SENTENCE_CONNECTORS[v % SENTENCE_CONNECTORS.len()];
let adj = offset_by_tone(ADJECTIVES, tone - 1.0, v + 2);
let subj = offset_by_tone(SUBJECT_NOUNS, tone + 1.0, v + 3);
let verb = offset_by_tone(TRANSITIVE_VERBS, tone, v + 4);
let obj = offset_by_tone(OBJECT_NOUNS, tone + 2.0, v + 5);
format!("{} the {} {} {} {}.", connector, adj, subj, verb, obj)
}
_ => {
let subj = offset_by_tone(SUBJECT_NOUNS, tone + 3.0, v + 1);
let verb = offset_by_tone(TRANSITIVE_VERBS, tone + 1.5, v + 2);
let adj = offset_by_tone(ADJECTIVES, tone - 3.0, v + 3);
let obj = offset_by_tone(OBJECT_NOUNS, tone, v + 4);
let prep = PREPOSITIONS_RICH[(v + 2) % PREPOSITIONS_RICH.len()];
format!(
"{} {} {} {} {} {}.",
capitalize(subj),
verb,
adj,
obj,
prep,
topic
)
}
};
Ok(sentence)
}
pub fn generate(&self, seed: &str) -> Result<String> {
self.generate_variant(seed, 0)
}
}
fn capitalize(s: &str) -> String {
let mut c = s.chars();
match c.next() {
None => String::new(),
Some(f) => {
let upper: String = f.to_uppercase().collect();
upper + c.as_str()
}
}
}
fn sentence_has_verb(lower: &str) -> bool {
let present_tense = [
" is ",
" are ",
" was ",
" were ",
" has ",
" have ",
" supports ",
" provides ",
" enables ",
" uses ",
" describes ",
" represents ",
" includes ",
" can ",
" will ",
" does ",
" do ",
" spans ",
" emphasizes ",
" creates ",
" allows ",
" helps ",
" makes ",
" runs ",
" enforces ",
" prevents ",
" reduces ",
" increases ",
" remains ",
" requires ",
" relies ",
" acts ",
" forms ",
];
if present_tense.iter().any(|&v| lower.contains(v)) {
return true;
}
let irregulars = [
" built ",
" made ",
" went ",
" came ",
" gave ",
" took ",
" found ",
" knew ",
" thought ",
" brought ",
" left ",
" became ",
" saw ",
" led ",
" kept ",
" held ",
" told ",
" got ",
" set ",
" put ",
" let ",
" cut ",
" hit ",
" ran ",
" stood ",
" lost ",
" won ",
" fell ",
" grew ",
" bore ",
" drew ",
" rose ",
" wore ",
" spoke ",
" wrote ",
" chose ",
" drove ",
" began ",
" swam ",
" flew ",
" threw ",
" struggled ",
" struggle ",
" replicate ",
" align ",
];
if irregulars.iter().any(|&v| lower.contains(v)) {
return true;
}
for word in lower.split_whitespace() {
let w = word.trim_matches(|c: char| !c.is_ascii_alphabetic());
if w.len() > 4 && w.ends_with("ed") {
return true;
}
if w.len() > 6 && w.ends_with("ing") {
return true;
}
}
false
}
pub struct TextSummarizer {
pub sentence_count: usize,
pub iterations: usize,
pub depth: usize,
}
impl TextSummarizer {
pub fn new(sentence_count: usize, iterations: usize, depth: usize) -> Self {
Self {
sentence_count,
iterations,
depth,
}
}
pub fn summarize(&self, text: &str) -> Result<Vec<String>> {
self.summarize_with_query(text, "")
}
pub fn summarize_with_query(&self, text: &str, query: &str) -> Result<Vec<String>> {
let sentences = split_into_sentences(text);
if sentences.is_empty() {
return Err(LmmError::Perception(
"No complete sentences found in input.".into(),
));
}
if sentences.len() <= self.sentence_count {
return Ok(deduplicate_sentences(sentences));
}
let global_tone = text_tone(text);
let avg_len =
sentences.iter().map(|s| s.len()).sum::<usize>() as f64 / sentences.len() as f64;
let want = self.sentence_count;
let query_keywords: Vec<String> = query
.split_whitespace()
.filter(|w| w.len() > 3)
.map(|w| w.to_lowercase())
.collect();
let mut scored: Vec<(usize, f64)> = sentences
.iter()
.enumerate()
.map(|(i, s)| {
let lower = s.to_lowercase();
let tone = text_tone(s);
let tone_delta = (tone - global_tone).abs();
let len_ratio = s.len() as f64 / avg_len.max(1.0);
let len_score = len_ratio.min(2.5);
let early_bonus = if i == 0 { 2.5 } else { 0.0 };
let comma_count = s.matches(',').count();
let comma_penalty = if comma_count > 3 {
(comma_count as f64 - 3.0) * 1.8
} else {
0.0
};
let has_verb = sentence_has_verb(&lower);
let verb_bonus = if has_verb { 1.2 } else { -3.0 };
let relevance_bonus: f64 = if query_keywords.is_empty() {
0.0
} else {
let matches = query_keywords
.iter()
.filter(|kw| lower.contains(kw.as_str()))
.count();
matches as f64 * 0.8
};
let total = len_score * 1.0 + early_bonus - tone_delta * 0.3 - comma_penalty
+ verb_bonus
+ relevance_bonus;
(i, total)
})
.collect();
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
let mut selected: Vec<usize> = scored.into_iter().take(want).map(|(i, _)| i).collect();
selected.sort_unstable();
selected.dedup();
let result: Vec<String> =
deduplicate_sentences(selected.into_iter().map(|i| sentences[i].clone()).collect());
Ok(result)
}
}
fn deduplicate_sentences(sentences: Vec<String>) -> Vec<String> {
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
sentences
.into_iter()
.filter(|s| {
let key = s
.split_whitespace()
.take(6)
.collect::<Vec<_>>()
.join(" ")
.to_lowercase();
seen.insert(key)
})
.collect()
}
pub struct ParagraphGenerator {
pub sentence_count: usize,
pub iterations: usize,
pub depth: usize,
}
impl ParagraphGenerator {
pub fn new(sentence_count: usize, iterations: usize, depth: usize) -> Self {
Self {
sentence_count,
iterations,
depth,
}
}
pub fn generate(&self, seed: &str) -> Result<String> {
let base_gen = SentenceGenerator::new(self.iterations, self.depth);
let keywords = keywords_from_text(seed, self.sentence_count.max(3));
let mut sentences: Vec<String> = Vec::with_capacity(self.sentence_count);
for i in 0..self.sentence_count {
let sub_seed = if i == 0 {
seed.to_string()
} else if let Some(kw) = keywords.get(i) {
format!("{} {}", seed, kw)
} else {
seed.to_string()
};
sentences.push(base_gen.generate_variant(&sub_seed, i)?);
}
Ok(sentences.join(" "))
}
}
pub struct EssayGenerator {
pub paragraph_count: usize,
pub sentence_count: usize,
pub iterations: usize,
pub depth: usize,
}
impl EssayGenerator {
pub fn new(
paragraph_count: usize,
sentence_count: usize,
iterations: usize,
depth: usize,
) -> Self {
Self {
paragraph_count,
sentence_count,
iterations,
depth,
}
}
pub fn generate(&self, topic: &str) -> Result<EssayOutput> {
let title = topic
.split_whitespace()
.take(7)
.map(capitalize)
.collect::<Vec<_>>()
.join(" ");
let keywords = keywords_from_text(topic, self.paragraph_count + 2);
let para_gen = ParagraphGenerator::new(self.sentence_count, self.iterations, self.depth);
let single_gen = SentenceGenerator::new(self.iterations, self.depth);
let mut paragraphs: Vec<String> = Vec::new();
let intro_seed = format!("{} fundamental truth", topic);
paragraphs.push(para_gen.generate(&intro_seed)?);
for (i, kw) in keywords.iter().take(self.paragraph_count).enumerate() {
let body_seed = format!("{} {}", topic, kw);
let mut body_sentences: Vec<String> = Vec::new();
for j in 0..self.sentence_count {
body_sentences.push(
single_gen.generate_variant(&body_seed, i * self.sentence_count + j + 4)?,
);
}
paragraphs.push(body_sentences.join(" "));
}
let concl_seed = format!("{} coherence understanding", topic);
paragraphs.push(para_gen.generate(&concl_seed)?);
Ok(EssayOutput { title, paragraphs })
}
}
pub struct EssayOutput {
pub title: String,
pub paragraphs: Vec<String>,
}