Skip to main content

oxios_memory/memory/
compaction.rs

1//! Compaction tree — 5-level memory compression hierarchy.
2//!
3//! Raw → Daily → Weekly → Monthly → Root
4//!
5//! Older memories are progressively compressed into higher-level summaries,
6//! preserving key information while reducing storage and context size.
7//!
8//! ## Levels
9//!
10//! | Level   | Value | Threshold | Description                        |
11//! |---------|-------|-----------|------------------------------------|
12//! | Raw     | 0     | 200 lines | Uncompressed session data          |
13//! | Daily   | 1     | 300 lines | Compressed from raw (per-day)      |
14//! | Weekly  | 2     | 500 lines | Compressed from daily (per-week)   |
15//! | Monthly | 3     | ∞         | Compressed from weekly (per-month) |
16//! | Root    | 4     | ∞         | Top-level index entry              |
17
18use serde::{Deserialize, Serialize};
19
20// ---------------------------------------------------------------------------
21// CompactionLevel
22// ---------------------------------------------------------------------------
23
24/// Compaction level in the compression hierarchy.
25#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
26pub enum CompactionLevel {
27    /// Raw session data (uncompressed).
28    Raw = 0,
29    /// Daily summary (compressed from raw).
30    Daily = 1,
31    /// Weekly summary (compressed from weekly).
32    Weekly = 2,
33    /// Monthly summary (compressed from weekly).
34    Monthly = 3,
35    /// Root index entry (top-level summary).
36    Root = 4,
37}
38
39impl CompactionLevel {
40    /// Line count threshold for triggering compaction at this level.
41    ///
42    /// When content at this level exceeds the threshold, it becomes a
43    /// candidate for promotion to the next level.
44    pub fn threshold(&self) -> usize {
45        match self {
46            CompactionLevel::Raw => 200,
47            CompactionLevel::Daily => 300,
48            CompactionLevel::Weekly => 500,
49            CompactionLevel::Monthly => usize::MAX,
50            CompactionLevel::Root => usize::MAX,
51        }
52    }
53
54    /// Storage subdirectory name for this level.
55    pub fn dir_name(&self) -> &'static str {
56        match self {
57            CompactionLevel::Raw => "raw",
58            CompactionLevel::Daily => "daily",
59            CompactionLevel::Weekly => "weekly",
60            CompactionLevel::Monthly => "monthly",
61            CompactionLevel::Root => "root",
62        }
63    }
64
65    /// All levels in ascending order.
66    pub fn all() -> &'static [CompactionLevel] {
67        &[
68            CompactionLevel::Raw,
69            CompactionLevel::Daily,
70            CompactionLevel::Weekly,
71            CompactionLevel::Monthly,
72            CompactionLevel::Root,
73        ]
74    }
75
76    /// Get the next higher compaction level.
77    ///
78    /// Returns `None` for `Root` (topmost level).
79    pub fn next(&self) -> Option<CompactionLevel> {
80        match self {
81            CompactionLevel::Raw => Some(CompactionLevel::Daily),
82            CompactionLevel::Daily => Some(CompactionLevel::Weekly),
83            CompactionLevel::Weekly => Some(CompactionLevel::Monthly),
84            CompactionLevel::Monthly => Some(CompactionLevel::Root),
85            CompactionLevel::Root => None,
86        }
87    }
88
89    /// Numeric value of this level.
90    pub fn as_u8(&self) -> u8 {
91        *self as u8
92    }
93
94    /// Try to convert a u8 to a CompactionLevel.
95    pub fn from_u8(v: u8) -> Option<Self> {
96        match v {
97            0 => Some(CompactionLevel::Raw),
98            1 => Some(CompactionLevel::Daily),
99            2 => Some(CompactionLevel::Weekly),
100            3 => Some(CompactionLevel::Monthly),
101            4 => Some(CompactionLevel::Root),
102            _ => None,
103        }
104    }
105
106    /// Compression ratio for content being promoted *from* this level.
107    ///
108    /// The ratio indicates what fraction of lines to keep. Lower levels
109    /// compress more aggressively because the source data is more verbose.
110    fn compression_ratio(&self) -> f64 {
111        match self {
112            CompactionLevel::Raw => 0.30,     // Keep 30% of raw lines
113            CompactionLevel::Daily => 0.40,   // Keep 40% of daily lines
114            CompactionLevel::Weekly => 0.50,  // Keep 50% of weekly lines
115            CompactionLevel::Monthly => 0.60, // Keep 60% of monthly lines
116            CompactionLevel::Root => 1.0,     // Root is never compressed further
117        }
118    }
119
120    /// Target number of summary lines when promoting content at this level.
121    fn target_summary_lines(&self) -> usize {
122        match self {
123            CompactionLevel::Raw => 15,
124            CompactionLevel::Daily => 20,
125            CompactionLevel::Weekly => 10,
126            CompactionLevel::Monthly => 5,
127            CompactionLevel::Root => 0,
128        }
129    }
130}
131
132// ---------------------------------------------------------------------------
133// CompactionTree
134// ---------------------------------------------------------------------------
135
136/// Compaction tree manager.
137///
138/// Manages the 5-level compression hierarchy. Dream calls `compact()` to
139/// promote entries up the tree when they exceed size thresholds.
140///
141/// # Usage
142///
143/// ```ignore
144/// let tree = CompactionTree::default_tree();
145///
146/// // Check if content should be promoted
147/// if tree.should_promote("long content...", CompactionLevel::Raw) {
148///     let compacted = tree.compact_to_level("long content...", CompactionLevel::Raw, CompactionLevel::Daily);
149/// }
150///
151/// // Promote several entries at once
152/// let entries = vec!["entry 1".to_string(), "entry 2".to_string()];
153/// let daily_summary = tree.promote(&entries, CompactionLevel::Raw);
154/// ```
155pub struct CompactionTree {
156    /// Line count threshold for triggering compaction.
157    pub line_threshold: usize,
158}
159
160impl CompactionTree {
161    /// Create a new compaction tree with the given line threshold.
162    pub fn new(line_threshold: usize) -> Self {
163        Self { line_threshold }
164    }
165
166    /// Create with default threshold (200 lines).
167    pub fn default_tree() -> Self {
168        Self::new(200)
169    }
170
171    /// Check if content should be compacted based on line count.
172    ///
173    /// Uses the tree's configured `line_threshold`.
174    pub fn should_compact(&self, content: &str) -> bool {
175        content.lines().count() >= self.line_threshold
176    }
177
178    /// Check if content at a given level should be promoted to the next level.
179    ///
180    /// Promotion is indicated when the content exceeds the threshold for
181    /// its current compaction level.
182    pub fn should_promote(&self, content: &str, current_level: CompactionLevel) -> bool {
183        let line_count = content.lines().count();
184        line_count >= current_level.threshold()
185    }
186
187    /// Compact content from one level to another.
188    ///
189    /// This applies rule-based compression appropriate for the target level.
190    /// If `to` is not strictly higher than `from`, the content is returned
191    /// unchanged.
192    ///
193    /// # Multi-level compaction
194    ///
195    /// When compacting across multiple levels (e.g., Raw → Weekly), the
196    /// compression is applied progressively through each intermediate level
197    /// for better quality.
198    pub fn compact_to_level(
199        &self,
200        content: &str,
201        from: CompactionLevel,
202        to: CompactionLevel,
203    ) -> String {
204        if to.as_u8() <= from.as_u8() {
205            // Cannot compact to same or lower level
206            return content.to_string();
207        }
208
209        // Progressive compaction through each intermediate level
210        let mut current_content = content.to_string();
211        let mut current_level = from;
212
213        while current_level < to {
214            let next_level = match current_level.next() {
215                Some(n) => n,
216                None => break,
217            };
218            current_content = self.compact_single_level(&current_content, current_level);
219            current_level = next_level;
220        }
221
222        current_content
223    }
224
225    /// Promote multiple entries from a given level into a single summary
226    /// at the next level.
227    ///
228    /// This is used by the dream process to consolidate several entries
229    /// (e.g., multiple daily summaries) into one higher-level summary
230    /// (e.g., a weekly summary).
231    ///
232    /// # Returns
233    ///
234    /// A compacted summary string combining all entries.
235    pub fn promote(&self, entries: &[String], level: CompactionLevel) -> String {
236        if entries.is_empty() {
237            return String::new();
238        }
239
240        if entries.len() == 1 {
241            // Single entry: just compact it
242            return self.compact_single_level(&entries[0], level);
243        }
244
245        // Combine entries with section markers
246        let combined = entries.join("\n---\n");
247
248        // Apply compaction for the source level
249        let compacted = self.compact_single_level(&combined, level);
250
251        // Add header indicating promotion
252        let header = match level.next() {
253            Some(next) => format!(
254                "[{} summary from {} entries]",
255                next.dir_name(),
256                entries.len()
257            ),
258            None => format!("[Root summary from {} entries]", entries.len()),
259        };
260
261        format!("{header}\n{compacted}")
262    }
263
264    /// Simple rule-based compaction: preserve first/last sentences of each
265    /// paragraph, discard middle.
266    ///
267    /// This is the fallback when LLM compaction is not available.
268    pub fn rule_based_compact(&self, content: &str) -> String {
269        self.compact_single_level(content, CompactionLevel::Raw)
270    }
271
272    // -----------------------------------------------------------------------
273    // Internal helpers
274    // -----------------------------------------------------------------------
275
276    /// Compress content for promotion from a single level to the next.
277    ///
278    /// Uses a rule-based strategy:
279    /// 1. If content is short enough, return as-is.
280    /// 2. Extract key lines: section headers, first/last lines of sections.
281    /// 3. Apply compression ratio to determine how many lines to keep.
282    /// 4. Preserve structural markers (headers, separators).
283    fn compact_single_level(&self, content: &str, from_level: CompactionLevel) -> String {
284        let lines: Vec<&str> = content.lines().collect();
285
286        // Short content: no compaction needed
287        if lines.len() < 5 {
288            return content.to_string();
289        }
290
291        let ratio = from_level.compression_ratio();
292        let target = from_level.target_summary_lines();
293        let keep_count = ((lines.len() as f64) * ratio) as usize;
294        let keep_count = keep_count.max(target).min(lines.len());
295
296        // If we'd keep everything, skip
297        if keep_count >= lines.len() {
298            return content.to_string();
299        }
300
301        // Strategy: keep structural lines + head + tail
302        let mut kept_indices = std::collections::HashSet::new();
303
304        // 1. Always preserve section markers (lines starting with # or ---)
305        for (i, line) in lines.iter().enumerate() {
306            let trimmed = line.trim();
307            if trimmed.starts_with('#')
308                || trimmed.starts_with("---")
309                || trimmed.starts_with("===")
310                || trimmed.starts_with('[')
311                || trimmed.starts_with('*')
312                || trimmed.is_empty()
313            {
314                kept_indices.insert(i);
315            }
316        }
317
318        // 2. Preserve first N non-structural lines
319        let head_count = (keep_count / 3).max(2);
320        let mut head_taken = 0;
321        for (i, _line) in lines.iter().enumerate() {
322            if head_taken >= head_count {
323                break;
324            }
325            if !kept_indices.contains(&i) {
326                kept_indices.insert(i);
327                head_taken += 1;
328            }
329        }
330
331        // 3. Preserve last N non-structural lines
332        let tail_count = (keep_count / 3).max(2);
333        let mut tail_taken = 0;
334        for (i, _line) in lines.iter().enumerate().rev() {
335            if tail_taken >= tail_count {
336                break;
337            }
338            if !kept_indices.contains(&i) {
339                kept_indices.insert(i);
340                tail_taken += 1;
341            }
342        }
343
344        // 4. If still under target, sample middle lines at even intervals
345        if kept_indices.len() < keep_count {
346            let remaining = keep_count - kept_indices.len();
347            let middle_lines: Vec<usize> = (0..lines.len())
348                .filter(|i| !kept_indices.contains(i))
349                .collect();
350
351            if !middle_lines.is_empty() {
352                let step = (middle_lines.len() as f64 / remaining as f64).max(1.0) as usize;
353                for idx in (0..middle_lines.len()).step_by(step) {
354                    if kept_indices.len() >= keep_count {
355                        break;
356                    }
357                    kept_indices.insert(middle_lines[idx]);
358                }
359            }
360        }
361
362        // Build result in order
363        let mut sorted_indices: Vec<usize> = kept_indices.into_iter().collect();
364        sorted_indices.sort_unstable();
365
366        let mut result = Vec::new();
367        let mut last_idx = 0isize;
368        for idx in sorted_indices {
369            if last_idx >= 0 && (idx as isize) > last_idx + 1 {
370                // Gap detected — add omission marker
371                let omitted = idx - (last_idx as usize) - 1;
372                result.push(format!("... ({omitted} lines omitted) ..."));
373            }
374            result.push(lines[idx].to_string());
375            last_idx = idx as isize;
376        }
377
378        // Trailing omitted lines
379        if (last_idx as usize) < lines.len() - 1 {
380            let omitted = lines.len() - 1 - (last_idx as usize);
381            result.push(format!("... ({omitted} lines omitted) ..."));
382        }
383
384        result.join("\n")
385    }
386}
387
388// ---------------------------------------------------------------------------
389// Tests
390// ---------------------------------------------------------------------------
391
392#[cfg(test)]
393mod tests {
394    use super::*;
395
396    #[test]
397    fn test_compaction_level_next() {
398        assert_eq!(CompactionLevel::Raw.next(), Some(CompactionLevel::Daily));
399        assert_eq!(CompactionLevel::Daily.next(), Some(CompactionLevel::Weekly));
400        assert_eq!(
401            CompactionLevel::Weekly.next(),
402            Some(CompactionLevel::Monthly)
403        );
404        assert_eq!(CompactionLevel::Monthly.next(), Some(CompactionLevel::Root));
405        assert_eq!(CompactionLevel::Root.next(), None);
406    }
407
408    #[test]
409    fn test_compaction_level_u8_roundtrip() {
410        for level in CompactionLevel::all() {
411            assert_eq!(CompactionLevel::from_u8(level.as_u8()), Some(*level));
412        }
413        assert_eq!(CompactionLevel::from_u8(5), None);
414    }
415
416    #[test]
417    fn test_compaction_level_thresholds() {
418        assert_eq!(CompactionLevel::Raw.threshold(), 200);
419        assert_eq!(CompactionLevel::Daily.threshold(), 300);
420        assert_eq!(CompactionLevel::Weekly.threshold(), 500);
421        assert_eq!(CompactionLevel::Monthly.threshold(), usize::MAX);
422        assert_eq!(CompactionLevel::Root.threshold(), usize::MAX);
423    }
424
425    #[test]
426    fn test_should_compact_short() {
427        let tree = CompactionTree::new(10);
428        let content = "line 1\nline 2\nline 3";
429        assert!(!tree.should_compact(content));
430    }
431
432    #[test]
433    fn test_should_compact_long() {
434        let tree = CompactionTree::new(5);
435        let content = (0..10)
436            .map(|i| format!("line {}", i))
437            .collect::<Vec<_>>()
438            .join("\n");
439        assert!(tree.should_compact(&content));
440    }
441
442    #[test]
443    fn test_should_promote_at_threshold() {
444        let tree = CompactionTree::default_tree();
445        // Create content with 200+ lines (Raw threshold)
446        let content: String = (0..200)
447            .map(|i| format!("line {}", i))
448            .collect::<Vec<_>>()
449            .join("\n");
450        assert!(tree.should_promote(&content, CompactionLevel::Raw));
451    }
452
453    #[test]
454    fn test_should_not_promote_below_threshold() {
455        let tree = CompactionTree::default_tree();
456        let content = "line 1\nline 2\nline 3";
457        assert!(!tree.should_promote(content, CompactionLevel::Raw));
458    }
459
460    #[test]
461    fn test_rule_based_compact_short() {
462        let tree = CompactionTree::new(10);
463        let content = "line 1\nline 2\nline 3";
464        let result = tree.rule_based_compact(content);
465        assert_eq!(result, content);
466    }
467
468    #[test]
469    fn test_rule_based_compact_long() {
470        let tree = CompactionTree::new(10);
471        let content = (0..50)
472            .map(|i| format!("line {}", i))
473            .collect::<Vec<_>>()
474            .join("\n");
475        let result = tree.rule_based_compact(&content);
476        assert!(result.lines().count() < 50, "Should be compacted");
477        assert!(result.contains("line 0"), "Should preserve first line");
478        assert!(result.contains("line 49"), "Should preserve last line");
479        assert!(
480            result.contains("omitted"),
481            "Should indicate omitted content"
482        );
483    }
484
485    #[test]
486    fn test_compact_to_level_same_level() {
487        let tree = CompactionTree::default_tree();
488        let content = "line 1\nline 2\nline 3\nline 4\nline 5";
489        let result = tree.compact_to_level(content, CompactionLevel::Raw, CompactionLevel::Raw);
490        assert_eq!(result, content, "Same level should return unchanged");
491    }
492
493    #[test]
494    fn test_compact_to_level_lower_level() {
495        let tree = CompactionTree::default_tree();
496        let content = "line 1\nline 2\nline 3";
497        let result = tree.compact_to_level(content, CompactionLevel::Daily, CompactionLevel::Raw);
498        assert_eq!(
499            result, content,
500            "Compacting to lower level should return unchanged"
501        );
502    }
503
504    #[test]
505    fn test_compact_to_level_single_step() {
506        let tree = CompactionTree::default_tree();
507        let content: String = (0..50)
508            .map(|i| format!("line {}", i))
509            .collect::<Vec<_>>()
510            .join("\n");
511
512        let result = tree.compact_to_level(&content, CompactionLevel::Raw, CompactionLevel::Daily);
513        assert!(
514            result.lines().count() < content.lines().count(),
515            "Should be compacted"
516        );
517        assert!(result.contains("line 0"), "Should preserve first line");
518    }
519
520    #[test]
521    fn test_compact_to_level_multi_step() {
522        let tree = CompactionTree::default_tree();
523        let content: String = (0..100)
524            .map(|i| format!("line {}", i))
525            .collect::<Vec<_>>()
526            .join("\n");
527
528        // Raw → Weekly (2 steps: Raw→Daily→Weekly)
529        let result = tree.compact_to_level(&content, CompactionLevel::Raw, CompactionLevel::Weekly);
530        assert!(
531            result.lines().count() < 100,
532            "Multi-step compaction should reduce size"
533        );
534    }
535
536    #[test]
537    fn test_compact_preserves_headers() {
538        let tree = CompactionTree::default_tree();
539        let content = "# Header 1\n\
540                       line 1\nline 2\nline 3\nline 4\nline 5\n\
541                       # Header 2\n\
542                       line 6\nline 7\nline 8\nline 9\nline 10";
543
544        let result = tree.compact_single_level(content, CompactionLevel::Raw);
545        assert!(result.contains("# Header 1"), "Should preserve headers");
546        assert!(result.contains("# Header 2"), "Should preserve headers");
547    }
548
549    #[test]
550    fn test_promote_multiple_entries() {
551        let tree = CompactionTree::default_tree();
552        let entries: Vec<String> = (0..3)
553            .map(|i| {
554                (0..20)
555                    .map(|j| format!("entry {} line {}", i, j))
556                    .collect::<Vec<_>>()
557                    .join("\n")
558            })
559            .collect();
560
561        let result = tree.promote(&entries, CompactionLevel::Raw);
562        assert!(
563            result.contains("[daily summary from 3 entries]"),
564            "Should include promotion header"
565        );
566        // Result should be shorter than the sum of all entries
567        let total_original: usize = entries.iter().map(|e| e.lines().count()).sum();
568        assert!(
569            result.lines().count() < total_original,
570            "Promoted result should be shorter than combined originals"
571        );
572    }
573
574    #[test]
575    fn test_promote_single_entry() {
576        let tree = CompactionTree::default_tree();
577        let entries = vec![(0..20)
578            .map(|i| format!("line {}", i))
579            .collect::<Vec<_>>()
580            .join("\n")];
581
582        let result = tree.promote(&entries, CompactionLevel::Raw);
583        // Single entry should just be compacted (no header)
584        assert!(
585            !result.contains("summary from"),
586            "Single entry should not have multi-entry header"
587        );
588    }
589
590    #[test]
591    fn test_promote_empty() {
592        let tree = CompactionTree::default_tree();
593        let result = tree.promote(&[], CompactionLevel::Raw);
594        assert!(result.is_empty(), "Empty input should return empty string");
595    }
596
597    #[test]
598    fn test_compact_single_level_very_short() {
599        let tree = CompactionTree::default_tree();
600        let content = "line 1\nline 2";
601        let result = tree.compact_single_level(content, CompactionLevel::Raw);
602        assert_eq!(
603            result, content,
604            "Very short content should not be compacted"
605        );
606    }
607
608    #[test]
609    fn test_dir_name() {
610        assert_eq!(CompactionLevel::Raw.dir_name(), "raw");
611        assert_eq!(CompactionLevel::Daily.dir_name(), "daily");
612        assert_eq!(CompactionLevel::Weekly.dir_name(), "weekly");
613        assert_eq!(CompactionLevel::Monthly.dir_name(), "monthly");
614        assert_eq!(CompactionLevel::Root.dir_name(), "root");
615    }
616}