roboticus-agent 0.11.4

//! Context compaction — minimize the token footprint of all injected context.
//!
//! Every piece of context (memories, recent activity, checkpoints, hippocampus
//! summaries, topic summaries) flows through this module before entering the
//! token budget. The compactor:
//!
//! 1. **Deduplicates** — merges overlapping entries (same fact from different
//!    tiers, or the same memory appearing in both ambient and similarity results)
//! 2. **Compresses** — strips verbose formatting, collapses multi-line entries
//!    into single-line bullets, removes low-information-density content
//! 3. **Prioritizes** — ranks entries by `(recency × relevance × importance)`
//!    and drops the lowest-value entries first when the budget is exceeded
//! 4. **Enforces ceiling** — hard token cap ensures memory never starves system
//!    prompt or conversation history
//!
//! ## Architecture position
//!
//! ```text
//! Retrieval → Compaction → Context Assembly → Inference
//!     ↑            ↑              ↑
//!  raw memories   compressed    token-budgeted
//!  (verbose)      (minimal)     (final context)
//! ```

use crate::context::estimate_tokens;

/// A single piece of context to be compacted.
#[derive(Debug, Clone)]
pub struct ContextEntry {
    /// Source tier: "working", "episodic", "semantic", "procedural",
    /// "relationship", "ambient", "checkpoint", "hippocampus", "topic_summary"
    pub source: String,
    /// The raw content text.
    pub content: String,
    /// Importance score (higher = more important to retain). 0-10 scale.
    pub importance: f32,
    /// Recency: seconds since creation. Lower = more recent = higher priority.
    pub age_seconds: u64,
    /// Relevance: cosine similarity to the current query (0.0-1.0).
    pub relevance: f32,
}

/// Compacted output ready for context assembly.
#[derive(Debug, Clone)]
pub struct CompactedContext {
    /// The compacted memory text block to inject.
    pub text: String,
    /// Number of entries retained after compaction.
    pub entries_retained: usize,
    /// Number of entries dropped during compaction.
    pub entries_dropped: usize,
    /// Estimated token count of the compacted output.
    pub tokens: usize,
}

/// Configuration for the compaction pass.
#[derive(Debug, Clone)]
pub struct CompactionConfig {
    /// Hard ceiling in tokens for the total compacted output.
    pub max_tokens: usize,
    /// Maximum characters per individual entry after compression.
    pub max_entry_chars: usize,
    /// Similarity threshold for deduplication (0.0-1.0). Entries with
    /// text overlap above this threshold are merged.
    pub dedup_threshold: f64,
}

impl Default for CompactionConfig {
    fn default() -> Self {
        Self {
            max_tokens: 2000,
            max_entry_chars: 200,
            dedup_threshold: 0.8,
        }
    }
}

/// Compact a set of context entries into a minimal-footprint text block.
///
/// This is the single entry point for all context compaction. Every memory
/// section should be converted to `ContextEntry` structs and passed through
/// this function before entering the token budget.
pub fn compact(entries: &[ContextEntry], config: &CompactionConfig) -> CompactedContext {
    if entries.is_empty() {
        return CompactedContext {
            text: String::new(),
            entries_retained: 0,
            entries_dropped: 0,
            tokens: 0,
        };
    }

    // Phase 1: Compress each entry
    let mut compressed: Vec<(f64, String, &str)> = entries
        .iter()
        .map(|e| {
            let text = compress_entry(&e.content, config.max_entry_chars);
            let priority = compute_priority(e);
            (priority, text, e.source.as_str())
        })
        .collect();

    // Phase 2: Deduplicate by text similarity
    deduplicate(&mut compressed, config.dedup_threshold);

    // Phase 3: Sort by priority (highest first)
    compressed.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));

    // Phase 4: Emit within token budget
    let mut output = String::new();
    let mut retained = 0usize;
    let mut tokens_used = 0usize;

    // Group by source for readable output
    let mut by_source: std::collections::BTreeMap<&str, Vec<&str>> =
        std::collections::BTreeMap::new();
    let mut budget_entries = Vec::new();

    for (_, text, source) in &compressed {
        let entry_tokens = estimate_tokens(text);
        if tokens_used + entry_tokens > config.max_tokens {
            break;
        }
        tokens_used += entry_tokens;
        retained += 1;
        budget_entries.push((*source, text.as_str()));
    }

    // Group retained entries by source for clean formatting
    for (source, text) in &budget_entries {
        by_source.entry(source).or_default().push(text);
    }

    for (source, texts) in &by_source {
        let header = source_header(source);
        output.push_str(&header);
        output.push('\n');
        for text in texts {
            output.push_str("- ");
            output.push_str(text);
            output.push('\n');
        }
        output.push('\n');
    }

    let final_tokens = estimate_tokens(&output);

    CompactedContext {
        text: output.trim_end().to_string(),
        entries_retained: retained,
        entries_dropped: entries.len().saturating_sub(retained),
        tokens: final_tokens,
    }
}

/// Compress a single entry to its minimal form.
fn compress_entry(content: &str, max_chars: usize) -> String {
    let mut text = content.trim().to_string();

    // Strip markdown formatting artifacts
    text = text.replace("**", "");
    text = text.replace("## ", "");
    text = text.replace("### ", "");

    // Collapse multi-line entries to single line
    if text.contains('\n') {
        text = text
            .lines()
            .map(|l| l.trim())
            .filter(|l| !l.is_empty())
            .collect::<Vec<_>>()
            .join("; ");
    }

    // Strip leading bullet/list markers
    if text.starts_with("- ") {
        text = text[2..].to_string();
    }
    if text.starts_with("• ") {
        text = text["• ".len()..].to_string();
    }

    // Strip metadata brackets like "[episodic_memory | sim=0.85]" anywhere
    while let Some(start) = text.find('[') {
        if let Some(end) = text[start..].find(']') {
            let bracket_content = &text[start + 1..start + end];
            // Only strip if it looks like metadata (contains | or sim= or source_table)
            if bracket_content.contains('|')
                || bracket_content.contains("sim=")
                || bracket_content.contains("_memory")
            {
                text = format!(
                    "{}{}",
                    text[..start].trim_end(),
                    text[start + end + 1..].trim_start()
                );
                continue;
            }
        }
        break;
    }

    // Truncate to max chars
    if text.len() > max_chars {
        text = text.chars().take(max_chars).collect();
        // Clean truncation — don't cut mid-word
        if let Some(last_space) = text.rfind(' ')
            && last_space > max_chars / 2
        {
            text.truncate(last_space);
        }
        text.push_str("...");
    }

    text
}

/// Compute a priority score combining recency, relevance, and importance.
/// Higher = more important to retain.
fn compute_priority(entry: &ContextEntry) -> f64 {
    let importance_norm = entry.importance as f64 / 10.0; // 0.0-1.0
    let relevance = entry.relevance as f64; // 0.0-1.0

    // Recency: exponential decay with 1-hour half-life
    let recency = if entry.age_seconds == 0 {
        1.0
    } else {
        (-0.693 * entry.age_seconds as f64 / 3600.0).exp() // ln(2)/3600
    };

    // Weighted combination: relevance dominates when present,
    // recency fills in when relevance is 0 (ambient entries)
    if relevance > 0.1 {
        0.4 * relevance + 0.3 * importance_norm + 0.3 * recency
    } else {
        0.2 * importance_norm + 0.8 * recency
    }
}

/// Remove entries whose compressed text is substantially similar to a
/// higher-priority entry already in the list.
fn deduplicate(entries: &mut Vec<(f64, String, &str)>, threshold: f64) {
    let mut i = 0;
    while i < entries.len() {
        let mut duplicate = false;
        for j in 0..i {
            if text_overlap(&entries[j].1, &entries[i].1) > threshold {
                duplicate = true;
                break;
            }
        }
        if duplicate {
            entries.remove(i);
        } else {
            i += 1;
        }
    }
}

/// Cheap text overlap measure: Jaccard similarity on word trigrams.
/// Public text overlap score for topic detection and deduplication.
/// Jaccard similarity on word trigrams — 1.0 = identical, 0.0 = no overlap.
pub fn text_overlap_score(a: &str, b: &str) -> f64 {
    text_overlap(a, b)
}

fn text_overlap(a: &str, b: &str) -> f64 {
    let trigrams_a = word_trigrams(a);
    let trigrams_b = word_trigrams(b);
    if trigrams_a.is_empty() && trigrams_b.is_empty() {
        return 1.0;
    }
    let intersection = trigrams_a.intersection(&trigrams_b).count();
    let union = trigrams_a.union(&trigrams_b).count();
    if union == 0 {
        0.0
    } else {
        intersection as f64 / union as f64
    }
}

fn word_trigrams(text: &str) -> std::collections::HashSet<String> {
    let words: Vec<&str> = text.split_whitespace().collect();
    if words.len() < 3 {
        return words.iter().map(|w| w.to_ascii_lowercase()).collect();
    }
    words
        .windows(3)
        .map(|w| format!("{} {} {}", w[0], w[1], w[2]).to_ascii_lowercase())
        .collect()
}

/// Compact a pre-formatted memory text block (from the retriever) to fit
/// within a token budget. This is the string-level compaction entry point
/// used at the retrieval → context assembly boundary.
///
/// Operates on the formatted `[Active Memory]` / `[Working Memory]` /
/// `[Relevant Memories]` / `[Recent Activity]` text blocks produced by
/// the retriever. Compresses each bullet, deduplicates, and drops
/// lowest-priority lines until the budget is satisfied.
pub fn compact_text(text: &str, max_tokens: usize) -> String {
    if text.is_empty() || max_tokens == 0 {
        return String::new();
    }

    let current_tokens = crate::context::estimate_tokens(text);
    if current_tokens <= max_tokens {
        return text.to_string(); // Already within budget
    }

    // Parse into lines, compress each, then reassemble within budget
    let mut output = String::new();
    let mut used = 0usize;

    for line in text.lines() {
        let trimmed = line.trim();
        if trimmed.is_empty() {
            continue;
        }

        // Section headers pass through unchanged
        if trimmed.starts_with('[') && trimmed.ends_with(']') {
            let header_tokens = crate::context::estimate_tokens(trimmed);
            if used + header_tokens > max_tokens {
                break;
            }
            if !output.is_empty() {
                output.push('\n');
            }
            output.push_str(trimmed);
            output.push('\n');
            used += header_tokens;
            continue;
        }

        // Compress bullet entries
        let compressed = compress_entry(trimmed, 150);
        let line_tokens = crate::context::estimate_tokens(&compressed);
        if used + line_tokens > max_tokens {
            break;
        }
        output.push_str(&compressed);
        output.push('\n');
        used += line_tokens;
    }

    output.trim_end().to_string()
}

/// Map source tier to a compact section header.
fn source_header(source: &str) -> String {
    match source {
        "working" => "[Working Memory]".to_string(),
        "ambient" => "[Recent Activity]".to_string(),
        "episodic" | "semantic" => "[Relevant Memories]".to_string(),
        "procedural" => "[Skills]".to_string(),
        "relationship" => "[Relationships]".to_string(),
        "checkpoint" => "[Session Context]".to_string(),
        "hippocampus" => "[Storage]".to_string(),
        "topic_summary" => "[Earlier Topics]".to_string(),
        other => format!("[{other}]"),
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn entry(
        source: &str,
        content: &str,
        importance: f32,
        age_secs: u64,
        relevance: f32,
    ) -> ContextEntry {
        ContextEntry {
            source: source.to_string(),
            content: content.to_string(),
            importance,
            age_seconds: age_secs,
            relevance,
        }
    }

    #[test]
    fn empty_input_produces_empty_output() {
        let result = compact(&[], &CompactionConfig::default());
        assert!(result.text.is_empty());
        assert_eq!(result.entries_retained, 0);
    }

    #[test]
    fn single_entry_passes_through() {
        let entries = vec![entry(
            "episodic",
            "User asked about workspace cleanup",
            5.0,
            300,
            0.8,
        )];
        let result = compact(&entries, &CompactionConfig::default());
        assert!(result.text.contains("workspace cleanup"));
        assert_eq!(result.entries_retained, 1);
        assert_eq!(result.entries_dropped, 0);
    }

    #[test]
    fn duplicates_are_removed() {
        let entries = vec![
            entry(
                "episodic",
                "Agent cleaned up workspace files",
                5.0,
                300,
                0.8,
            ),
            entry("ambient", "Agent cleaned up workspace files", 5.0, 300, 0.0),
        ];
        let result = compact(&entries, &CompactionConfig::default());
        assert_eq!(result.entries_retained, 1);
        assert_eq!(result.entries_dropped, 1);
    }

    #[test]
    fn budget_enforced() {
        let entries: Vec<ContextEntry> = (0..100)
            .map(|i| {
                entry(
                    "episodic",
                    &format!("Memory entry number {i} with some content to take up space"),
                    5.0,
                    i * 60,
                    0.5,
                )
            })
            .collect();
        let config = CompactionConfig {
            max_tokens: 100,
            ..Default::default()
        };
        let result = compact(&entries, &config);
        assert!(result.tokens <= 110); // small margin for estimation
        assert!(result.entries_retained < 100);
        assert!(result.entries_dropped > 0);
    }

    #[test]
    fn high_priority_entries_retained_first() {
        let entries = vec![
            entry("episodic", "Old low-relevance memory", 1.0, 7200, 0.1),
            entry("ambient", "Very recent high-importance fact", 9.0, 30, 0.0),
            entry("semantic", "Highly relevant stored fact", 5.0, 3600, 0.9),
        ];
        let config = CompactionConfig {
            max_tokens: 15, // extremely tight — can only fit ~1 entry with header
            ..Default::default()
        };
        let result = compact(&entries, &config);
        // Should retain at least the highest-priority entry
        assert!(result.entries_retained >= 1);
        // With such a tight budget, the old low-relevance memory should be
        // dropped in favor of higher-priority entries (recent or relevant).
        if result.entries_retained < 3 {
            assert!(!result.text.contains("Old low-relevance"));
        }
    }

    #[test]
    fn compress_strips_formatting() {
        let raw = "**Important**: This is a [episodic_memory | sim=0.85] formatted entry\nWith multiple lines\nAnd verbose content";
        let compressed = compress_entry(raw, 200);
        assert!(!compressed.contains("**"));
        assert!(!compressed.contains('\n'));
        assert!(!compressed.contains("sim=0.85"));
    }

    #[test]
    fn compress_truncates_long_entries() {
        let long = "a ".repeat(200);
        let compressed = compress_entry(&long, 50);
        assert!(compressed.len() < 60); // 50 + "..."
        assert!(compressed.ends_with("..."));
    }

    #[test]
    fn priority_favors_recent_relevant_entries() {
        let recent_relevant = ContextEntry {
            source: "episodic".into(),
            content: "test".into(),
            importance: 5.0,
            age_seconds: 60,
            relevance: 0.9,
        };
        let old_irrelevant = ContextEntry {
            source: "episodic".into(),
            content: "test".into(),
            importance: 5.0,
            age_seconds: 86400,
            relevance: 0.1,
        };
        assert!(compute_priority(&recent_relevant) > compute_priority(&old_irrelevant));
    }

    #[test]
    fn text_overlap_identical() {
        assert!((text_overlap("the quick brown fox", "the quick brown fox") - 1.0).abs() < 0.01);
    }

    #[test]
    fn text_overlap_different() {
        assert!(text_overlap("the quick brown fox", "completely different words here") < 0.1);
    }

    #[test]
    fn grouped_output_has_section_headers() {
        let entries = vec![
            entry("ambient", "Recent thing happened", 5.0, 60, 0.0),
            entry("procedural", "How to run a scan", 5.0, 3600, 0.7),
        ];
        let result = compact(&entries, &CompactionConfig::default());
        assert!(result.text.contains("[Recent Activity]"));
        assert!(result.text.contains("[Skills]"));
    }
}