use anyhow::Result;
use crate::config::{ConsolidatorConfig, ExtractiveConsolidatorConfig};
pub struct EpisodeInput<'a> {
pub text: &'a str,
pub frame_seq: u64,
pub session_id: &'a str,
pub episode_start_ts: f64,
pub episode_end_ts: f64,
}
#[derive(Debug, Clone)]
pub struct FactTriple {
pub subject: String,
pub predicate: String,
pub object: String,
pub support_span: Option<String>,
pub confidence: Option<f64>,
}
#[derive(Debug, Clone, Default)]
pub struct ConsolidationOutput {
pub summary: String,
pub facts: Vec<FactTriple>,
}
pub trait Consolidator: Send + Sync {
fn consolidate(&self, episode: &EpisodeInput<'_>) -> Result<ConsolidationOutput>;
fn mode(&self) -> &'static str;
}
pub struct ExtractiveConsolidator;
impl ExtractiveConsolidator {
pub fn new(_cfg: ExtractiveConsolidatorConfig) -> Self {
Self
}
}
impl Consolidator for ExtractiveConsolidator {
fn consolidate(&self, episode: &EpisodeInput<'_>) -> Result<ConsolidationOutput> {
let cleaned = strip_role_tags(episode.text);
let sentences = split_sentences(&cleaned);
let n = sentences.len();
let summary = if n <= 3 {
cleaned.trim().to_string()
} else {
let mut scored: Vec<(usize, &String, usize)> = sentences
.iter()
.enumerate()
.map(|(i, s)| (i, s, s.split_whitespace().count()))
.collect();
scored.sort_by(|a, b| b.2.cmp(&a.2).then(a.0.cmp(&b.0)));
let mut top: Vec<(usize, &String)> =
scored.into_iter().take(3).map(|(i, s, _)| (i, s)).collect();
top.sort_by(|a, b| a.0.cmp(&b.0));
top.into_iter()
.map(|(_, s)| s.clone())
.collect::<Vec<_>>()
.join(" ")
};
Ok(ConsolidationOutput {
summary,
facts: Vec::new(),
})
}
fn mode(&self) -> &'static str {
"extractive"
}
}
fn strip_role_tags(text: &str) -> String {
text.lines()
.map(|l| {
let trimmed = l.trim_start();
if trimmed.starts_with('[') {
if let Some(end) = trimmed.find(']') {
return trimmed[end + 1..].trim_start().to_string();
}
}
l.to_string()
})
.collect::<Vec<_>>()
.join("\n")
}
fn split_sentences(text: &str) -> Vec<String> {
let mut out = Vec::new();
let mut cur = String::new();
let mut chars = text.chars().peekable();
while let Some(c) = chars.next() {
cur.push(c);
if matches!(c, '.' | '!' | '?') {
let next_is_ws_or_end = chars.peek().map(|n| n.is_whitespace()).unwrap_or(true);
if next_is_ws_or_end {
let s = cur.trim().to_string();
if !s.is_empty() {
out.push(s);
}
cur.clear();
}
}
}
let tail = cur.trim().to_string();
if !tail.is_empty() {
out.push(tail);
}
out
}
pub fn build_consolidator(cfg: &ConsolidatorConfig) -> Box<dyn Consolidator> {
match cfg {
ConsolidatorConfig::Extractive(c) => Box::new(ExtractiveConsolidator::new(c.clone())),
}
}
#[cfg(test)]
mod tests {
use super::*;
fn ep(text: &str) -> EpisodeInput<'_> {
EpisodeInput {
text,
frame_seq: 0,
session_id: "s1",
episode_start_ts: 0.0,
episode_end_ts: 0.0,
}
}
#[test]
fn extractive_emits_summary_no_facts_on_short_text() {
let c = ExtractiveConsolidator;
let out = c
.consolidate(&ep("[user] We use Redis for the queue."))
.unwrap();
assert!(out.facts.is_empty());
assert!(out.summary.contains("Redis"));
assert!(!out.summary.starts_with("[user]")); }
#[test]
fn extractive_is_deterministic_on_same_input() {
let c = ExtractiveConsolidator;
let text = "[user] One. Two longer sentence here. Three. Four also longer.";
let a = c.consolidate(&ep(text)).unwrap();
let b = c.consolidate(&ep(text)).unwrap();
assert_eq!(a.summary, b.summary);
assert_eq!(a.facts.len(), b.facts.len());
}
#[test]
fn extractive_strips_role_tags_with_leading_whitespace() {
let c = ExtractiveConsolidator;
let out = c.consolidate(&ep(" [user] hello there")).unwrap();
assert!(!out.summary.contains("[user]"));
assert!(out.summary.contains("hello"));
}
#[test]
fn extractive_long_text_selects_top_sentences() {
let c = ExtractiveConsolidator;
let text = "short. \
A longer sentence with more words present here. \
tiny. \
Another reasonably long sentence to consider. \
The longest sentence in this set goes here with quite a few words.";
let out = c.consolidate(&ep(text)).unwrap();
assert!(out.summary.contains("longest"));
let sentence_count = out.summary.matches('.').count();
assert!(
sentence_count <= 3,
"expected <=3 sentences; got: {:?}",
out.summary
);
}
#[test]
fn mode_is_extractive_for_pgrg_extractor_column() {
let c = ExtractiveConsolidator;
assert_eq!(c.mode(), "extractive");
}
#[test]
fn build_consolidator_dispatches_extractive() {
let cfg = ConsolidatorConfig::Extractive(ExtractiveConsolidatorConfig {});
let c = build_consolidator(&cfg);
let out = c.consolidate(&ep("[user] hi.")).unwrap();
assert!(out.facts.is_empty());
assert_eq!(c.mode(), "extractive");
}
}