use crate::graph::MemNode;
use car_ir::json_extract::extract_json_object;
use car_ir::linalg::cosine_similarity;
use petgraph::stable_graph::NodeIndex;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompactionResult {
pub summary: String,
pub key_facts: Vec<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ConversationCompactionReport {
pub turns_summarized: usize,
pub summaries_created: usize,
pub facts_extracted: usize,
pub summaries_promoted: usize,
pub redundant_dropped: usize,
pub tokens_before: usize,
pub tokens_after: usize,
}
impl ConversationCompactionReport {
pub fn emit_telemetry(&self, layer: &str) {
if self.turns_summarized == 0 && self.redundant_dropped == 0 {
return;
}
let compression_ratio = if self.tokens_before > 0 {
self.tokens_after as f64 / self.tokens_before as f64
} else {
1.0
};
tracing::info!(
layer = %layer,
tokens_before = self.tokens_before,
tokens_after = self.tokens_after,
turns_summarized = self.turns_summarized,
summaries_created = self.summaries_created,
facts_extracted = self.facts_extracted,
summaries_promoted = self.summaries_promoted,
redundant_dropped = self.redundant_dropped,
compression_ratio = format_args!("{:.2}", compression_ratio),
"conversation compaction completed"
);
}
}
#[derive(Debug, Clone)]
pub struct TurnImportance {
pub nix: NodeIndex,
pub score: f32,
pub redundant: bool,
}
const DECISION_KEYWORDS: &[&str] = &[
"decided",
"agreed",
"will",
"should",
"must",
"need to",
"budget",
"deadline",
"plan",
"commit",
"approve",
"reject",
"choose",
"selected",
"confirmed",
"assigned",
"action item",
];
const STRONG_DECISION_KEYWORDS: &[&str] = &[
"decided",
"agreed",
"confirmed",
"approved",
"selected",
"assigned",
];
pub fn content_importance(text: &str) -> f32 {
let lower = text.to_lowercase();
let mut score: f32 = 0.0;
let decision_hits = DECISION_KEYWORDS
.iter()
.filter(|kw| lower.contains(*kw))
.count();
score += (decision_hits as f32 * 0.15).min(0.5);
if lower.contains('?') {
score += 0.1;
}
let has_numbers = text.chars().any(|c| c.is_ascii_digit());
if has_numbers {
score += 0.1;
}
let word_count = text.split_whitespace().count();
if word_count > 20 {
score += 0.1;
}
score.min(1.0)
}
pub fn graph_importance(nix: NodeIndex, graph: &crate::graph::MemoryGraph) -> f32 {
let edges = graph.inner.edges(nix).count()
+ graph
.inner
.edges_directed(nix, petgraph::Direction::Incoming)
.count();
let non_temporal = edges.saturating_sub(2);
(non_temporal as f32 * 0.2).min(0.5)
}
pub fn is_redundant(
turn_embedding: Option<&[f32]>,
fact_embeddings: &[&[f32]],
threshold: f32,
) -> bool {
let turn_emb = match turn_embedding {
Some(e) if !e.is_empty() => e,
_ => return false, };
fact_embeddings
.iter()
.any(|fact_emb| cosine_similarity(turn_emb, fact_emb) > threshold)
}
pub fn cluster_by_topic(
turns: &[(NodeIndex, Option<&[f32]>)],
similarity_threshold: f32,
max_cluster_size: usize,
) -> Vec<Vec<NodeIndex>> {
let has_embeddings = turns.iter().any(|(_, emb)| emb.is_some());
if !has_embeddings {
return turns
.chunks(max_cluster_size)
.map(|chunk| chunk.iter().map(|(nix, _)| *nix).collect())
.collect();
}
let mut clusters: Vec<(Vec<f32>, Vec<NodeIndex>)> = Vec::new();
for &(nix, emb_opt) in turns {
let emb = match emb_opt {
Some(e) if !e.is_empty() => e,
_ => {
if let Some(last) = clusters.last_mut() {
if last.1.len() < max_cluster_size {
last.1.push(nix);
continue;
}
}
clusters.push((Vec::new(), vec![nix]));
continue;
}
};
let mut best_cluster = None;
let mut best_sim = similarity_threshold;
for (i, (centroid, members)) in clusters.iter().enumerate() {
if members.len() >= max_cluster_size {
continue;
}
if centroid.is_empty() {
continue;
}
let sim = cosine_similarity(emb, centroid);
if sim > best_sim {
best_sim = sim;
best_cluster = Some(i);
}
}
if let Some(idx) = best_cluster {
let (centroid, members) = &mut clusters[idx];
let n = members.len() as f32;
for (i, &val) in emb.iter().enumerate() {
if i < centroid.len() {
centroid[i] = (centroid[i] * n + val) / (n + 1.0);
}
}
members.push(nix);
} else {
clusters.push((emb.to_vec(), vec![nix]));
}
}
clusters.into_iter().map(|(_, members)| members).collect()
}
pub fn summarize_conversation_prompt(turns: &[&MemNode]) -> String {
let turn_text: Vec<String> = turns.iter().map(|n| n.value.clone()).collect();
format!(
r#"Summarize the following conversation turns into a concise paragraph that preserves:
1. Key decisions made
2. Important facts stated
3. Action items or commitments
4. Questions that remain open
## Conversation
{turns}
Respond with ONLY a JSON object:
```json
{{
"summary": "Concise paragraph summary preserving key information",
"key_facts": ["fact1", "fact2"]
}}
```"#,
turns = turn_text.join("\n"),
)
}
pub fn extract_key_facts_prompt(summaries: &[&str]) -> String {
format!(
r#"Extract only the essential, atomic facts from these conversation summaries.
Each fact should be a single, self-contained statement.
## Summaries
{summaries}
Respond with ONLY a JSON object:
```json
{{
"summary": "One-sentence combined summary",
"key_facts": ["atomic fact 1", "atomic fact 2"]
}}
```"#,
summaries = summaries.join("\n---\n"),
)
}
pub fn parse_compaction_result(response: &str) -> CompactionResult {
if let Some(json_str) = extract_json_object(response) {
if let Ok(result) = serde_json::from_str::<CompactionResult>(&json_str) {
return result;
}
}
CompactionResult {
summary: response.trim().to_string(),
key_facts: Vec::new(),
}
}
pub fn heuristic_summarize(turns: &[&MemNode]) -> CompactionResult {
if turns.is_empty() {
return CompactionResult {
summary: String::new(),
key_facts: Vec::new(),
};
}
if turns.len() <= 2 {
let summary = turns
.iter()
.map(|n| n.value.as_str())
.collect::<Vec<_>>()
.join(" ");
return CompactionResult {
summary,
key_facts: Vec::new(),
};
}
let first = &turns[0].value;
let last = &turns[turns.len() - 1].value;
let middle = &turns[1..turns.len() - 1];
let mut key_sentences: Vec<String> = Vec::new();
let mut key_facts: Vec<String> = Vec::new();
for node in middle {
for sentence in node.value.split(|c| c == '.' || c == '!' || c == '?') {
let s = sentence.trim();
if s.is_empty() {
continue;
}
let lower = s.to_lowercase();
if DECISION_KEYWORDS.iter().any(|kw| lower.contains(kw)) {
key_sentences.push(format!("{}.", s));
if STRONG_DECISION_KEYWORDS.iter().any(|kw| lower.contains(kw)) {
key_facts.push(format!("{}.", s));
}
}
}
}
let summary = if key_sentences.is_empty() {
format!(
"{} [...{} turns summarized...] {}",
first,
middle.len(),
last
)
} else {
format!(
"{} [...{} turns summarized: {}...] {}",
first,
middle.len(),
key_sentences.join(" "),
last,
)
};
CompactionResult { summary, key_facts }
}
pub fn heuristic_extract_facts(summary_text: &str) -> Vec<String> {
let mut facts = Vec::new();
let fact_indicators = [
"decided",
"agreed",
"confirmed",
"is",
"are",
"was",
"were",
"must",
"will",
"should",
"need",
];
for sentence in summary_text.split(|c| c == '.' || c == '!' || c == '?') {
let s = sentence.trim();
if s.len() < 10 {
continue;
}
let lower = s.to_lowercase();
if fact_indicators.iter().any(|kw| lower.contains(kw)) && !lower.starts_with("[...") {
facts.push(format!("{}.", s));
}
}
facts
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::{ContentType, FactMetadata, MemKind};
use chrono::Utc;
fn conv_node(text: &str) -> MemNode {
MemNode {
kind: MemKind::Conversation,
layer: 3,
key: "user".to_string(),
value: text.to_string(),
fact_id: None,
scope: "global".to_string(),
authority: "peer".to_string(),
is_constraint: false,
created_at: Utc::now(),
expires_at: None,
content_type: ContentType::NaturalLanguage,
metadata: FactMetadata::default(),
}
}
#[test]
fn content_importance_scores_decisions() {
assert!(content_importance("We decided to use PostgreSQL") > 0.0);
assert!(content_importance("hello world") == 0.0);
assert!(
content_importance("We agreed on the budget of $500K")
> content_importance("Ok sounds good")
);
}
#[test]
fn redundancy_detection() {
let turn = [0.6f32, 0.8];
let fact = [0.6f32, 0.8];
assert!(is_redundant(Some(&turn), &[&fact], 0.85));
let turn2 = [1.0f32, 0.0];
let fact2 = [0.0f32, 1.0];
assert!(!is_redundant(Some(&turn2), &[&fact2], 0.85));
assert!(!is_redundant(None, &[&fact], 0.85));
}
#[test]
fn cluster_without_embeddings_falls_back_to_chunks() {
let turns: Vec<(NodeIndex, Option<&[f32]>)> =
(0..10).map(|i| (NodeIndex::new(i), None)).collect();
let clusters = cluster_by_topic(&turns, 0.7, 4);
assert_eq!(clusters.len(), 3); assert_eq!(clusters[0].len(), 4);
assert_eq!(clusters[1].len(), 4);
assert_eq!(clusters[2].len(), 2);
}
#[test]
fn cluster_with_embeddings_groups_similar() {
let emb_a1 = [1.0f32, 0.0, 0.0];
let emb_a2 = [0.95, 0.05, 0.0];
let emb_b1 = [0.0, 1.0, 0.0];
let emb_b2 = [0.0, 0.95, 0.05];
let turns = vec![
(NodeIndex::new(0), Some(emb_a1.as_slice())),
(NodeIndex::new(1), Some(emb_b1.as_slice())),
(NodeIndex::new(2), Some(emb_a2.as_slice())),
(NodeIndex::new(3), Some(emb_b2.as_slice())),
];
let clusters = cluster_by_topic(&turns, 0.7, 4);
assert_eq!(clusters.len(), 2);
}
#[test]
fn heuristic_summarize_basic() {
let nodes: Vec<MemNode> = (0..6).map(|i| conv_node(&format!("Turn {}", i))).collect();
let refs: Vec<&MemNode> = nodes.iter().collect();
let result = heuristic_summarize(&refs);
assert!(!result.summary.is_empty());
assert!(result.summary.contains("Turn 0"));
assert!(result.summary.contains("Turn 5"));
assert!(result.summary.contains("summarized"));
}
#[test]
fn heuristic_summarize_preserves_decisions() {
let nodes = vec![
conv_node("user: Let's discuss the database."),
conv_node("user: We decided to use PostgreSQL."),
conv_node("assistant: I agreed. PostgreSQL it is."),
conv_node("user: Now let's move on."),
];
let refs: Vec<&MemNode> = nodes.iter().collect();
let result = heuristic_summarize(&refs);
assert!(result.summary.contains("decided") || result.summary.contains("PostgreSQL"));
assert!(!result.key_facts.is_empty());
}
#[test]
fn heuristic_summarize_short_input() {
let nodes = vec![conv_node("Hello"), conv_node("World")];
let refs: Vec<&MemNode> = nodes.iter().collect();
let result = heuristic_summarize(&refs);
assert!(result.summary.contains("Hello"));
assert!(result.summary.contains("World"));
}
#[test]
fn heuristic_extract_facts_basic() {
let text = "We decided to use Rust. The deadline is Friday. Hello world.";
let facts = heuristic_extract_facts(text);
assert!(facts.iter().any(|f| f.contains("decided")));
assert!(facts.iter().any(|f| f.contains("deadline")));
}
#[test]
fn parse_compaction_result_json() {
let response = r#"```json
{"summary": "They chose PostgreSQL.", "key_facts": ["Database is PostgreSQL"]}
```"#;
let result = parse_compaction_result(response);
assert_eq!(result.summary, "They chose PostgreSQL.");
assert_eq!(result.key_facts, vec!["Database is PostgreSQL"]);
}
#[test]
fn parse_compaction_result_fallback() {
let response = "Just a plain text summary.";
let result = parse_compaction_result(response);
assert_eq!(result.summary, "Just a plain text summary.");
assert!(result.key_facts.is_empty());
}
#[test]
fn summarize_prompt_includes_all_turns() {
let nodes = vec![conv_node("Turn A"), conv_node("Turn B")];
let refs: Vec<&MemNode> = nodes.iter().collect();
let prompt = summarize_conversation_prompt(&refs);
assert!(prompt.contains("Turn A"));
assert!(prompt.contains("Turn B"));
assert!(prompt.contains("JSON"));
}
}