Skip to main content

memory_core/autonomous/
compression.rs

1use std::collections::HashSet;
2
3const ENTROPY_THRESHOLD: f64 = 0.35;
4
5const FILLER_PHRASES: &[&str] = &[
6    "i'll help",
7    "let me",
8    "sure thing",
9    "of course",
10    "no problem",
11    "here's what",
12    "as you can see",
13    "happy to help",
14    "certainly",
15    "absolutely",
16];
17
18/// Score the information density of a memory value.
19/// Returns 0.0 (no information) to 1.0 (high information).
20pub fn information_score(value: &str) -> f64 {
21    let words: Vec<&str> = value.split_whitespace().collect();
22    let word_count = words.len() as f64;
23
24    if word_count == 0.0 {
25        return 0.0;
26    }
27
28    let mut score = 0.0;
29
30    // Factor 1: Unique word ratio (vocabulary diversity)
31    let unique: HashSet<String> = words.iter().map(|w| w.to_lowercase()).collect();
32    let uniqueness = unique.len() as f64 / word_count;
33    score += uniqueness * 0.25;
34
35    // Factor 2: Contains specific identifiers (paths, qualified names, config keys)
36    let has_specifics =
37        value.contains('/') || value.contains("::") || value.contains('.') || value.contains('_');
38    if has_specifics {
39        score += 0.2;
40    }
41
42    // Factor 3: Contains code or structured data
43    let has_code =
44        value.contains('{') || value.contains('(') || value.contains('[') || value.contains('`');
45    if has_code {
46        score += 0.15;
47    }
48
49    // Factor 4: Length — very short is suspect, medium is ideal, very long is verbose
50    let length_score = if word_count < 3.0 {
51        0.0
52    } else if word_count < 6.0 {
53        0.1
54    } else if word_count < 100.0 {
55        0.2
56    } else {
57        0.15
58    };
59    score += length_score;
60
61    // Factor 5: Filler phrases are a strong negative signal
62    let lower = value.to_lowercase();
63    let filler_count = FILLER_PHRASES
64        .iter()
65        .filter(|f| lower.contains(**f))
66        .count();
67    if filler_count > 0 {
68        score -= 0.15 * filler_count as f64;
69    } else {
70        score += 0.05;
71    }
72
73    score.clamp(0.0, 1.0)
74}
75
76/// Returns true if the value has enough information density to store.
77pub fn should_store(value: &str) -> bool {
78    information_score(value) >= ENTROPY_THRESHOLD
79}
80
81/// Returns true if the value has enough information density for the given threshold.
82pub fn should_store_with_threshold(value: &str, threshold: f64) -> bool {
83    information_score(value) >= threshold
84}
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89
90    #[test]
91    fn high_info_content_passes() {
92        let value = "The auth module uses JWT tokens stored in HttpOnly cookies. \
93                     See src/auth/middleware.rs::validate_token() for the verification flow.";
94        assert!(should_store(value));
95        assert!(information_score(value) > 0.6);
96    }
97
98    #[test]
99    fn low_info_filler_rejected() {
100        assert!(!should_store("Sure thing, I'll help you with that"));
101        assert!(!should_store("Of course, let me take a look"));
102    }
103
104    #[test]
105    fn empty_string_rejected() {
106        assert!(!should_store(""));
107        assert_eq!(information_score(""), 0.0);
108    }
109
110    #[test]
111    fn terse_but_specific_passes() {
112        // Short but contains specifics — should pass
113        let value = "use bun, not npm. config in package.json";
114        assert!(should_store(value));
115    }
116
117    #[test]
118    fn code_snippet_passes() {
119        let value = "cargo test -p memory-core -- search";
120        assert!(should_store(value));
121    }
122
123    #[test]
124    fn single_word_rejected() {
125        assert!(!should_store("hello"));
126    }
127
128    #[test]
129    fn score_capped_at_one() {
130        let value = "src/store/memory.rs::save() handles upsert with blake3 dedup. \
131                     See also store/dedup.rs for normalize() and hash_content().";
132        assert!(information_score(value) <= 1.0);
133    }
134
135    #[test]
136    fn custom_threshold() {
137        let value = "test value with some content";
138        let score = information_score(value);
139        assert!(should_store_with_threshold(value, score - 0.01));
140        assert!(!should_store_with_threshold(value, score + 0.01));
141    }
142}