memory_core/autonomous/
compression.rs1use std::collections::HashSet;
2
3const ENTROPY_THRESHOLD: f64 = 0.35;
4
5const FILLER_PHRASES: &[&str] = &[
6 "i'll help",
7 "let me",
8 "sure thing",
9 "of course",
10 "no problem",
11 "here's what",
12 "as you can see",
13 "happy to help",
14 "certainly",
15 "absolutely",
16];
17
18pub fn information_score(value: &str) -> f64 {
21 let words: Vec<&str> = value.split_whitespace().collect();
22 let word_count = words.len() as f64;
23
24 if word_count == 0.0 {
25 return 0.0;
26 }
27
28 let mut score = 0.0;
29
30 let unique: HashSet<String> = words.iter().map(|w| w.to_lowercase()).collect();
32 let uniqueness = unique.len() as f64 / word_count;
33 score += uniqueness * 0.25;
34
35 let has_specifics =
37 value.contains('/') || value.contains("::") || value.contains('.') || value.contains('_');
38 if has_specifics {
39 score += 0.2;
40 }
41
42 let has_code =
44 value.contains('{') || value.contains('(') || value.contains('[') || value.contains('`');
45 if has_code {
46 score += 0.15;
47 }
48
49 let length_score = if word_count < 3.0 {
51 0.0
52 } else if word_count < 6.0 {
53 0.1
54 } else if word_count < 100.0 {
55 0.2
56 } else {
57 0.15
58 };
59 score += length_score;
60
61 let lower = value.to_lowercase();
63 let filler_count = FILLER_PHRASES
64 .iter()
65 .filter(|f| lower.contains(**f))
66 .count();
67 if filler_count > 0 {
68 score -= 0.15 * filler_count as f64;
69 } else {
70 score += 0.05;
71 }
72
73 score.clamp(0.0, 1.0)
74}
75
76pub fn should_store(value: &str) -> bool {
78 information_score(value) >= ENTROPY_THRESHOLD
79}
80
81pub fn should_store_with_threshold(value: &str, threshold: f64) -> bool {
83 information_score(value) >= threshold
84}
85
86#[cfg(test)]
87mod tests {
88 use super::*;
89
90 #[test]
91 fn high_info_content_passes() {
92 let value = "The auth module uses JWT tokens stored in HttpOnly cookies. \
93 See src/auth/middleware.rs::validate_token() for the verification flow.";
94 assert!(should_store(value));
95 assert!(information_score(value) > 0.6);
96 }
97
98 #[test]
99 fn low_info_filler_rejected() {
100 assert!(!should_store("Sure thing, I'll help you with that"));
101 assert!(!should_store("Of course, let me take a look"));
102 }
103
104 #[test]
105 fn empty_string_rejected() {
106 assert!(!should_store(""));
107 assert_eq!(information_score(""), 0.0);
108 }
109
110 #[test]
111 fn terse_but_specific_passes() {
112 let value = "use bun, not npm. config in package.json";
114 assert!(should_store(value));
115 }
116
117 #[test]
118 fn code_snippet_passes() {
119 let value = "cargo test -p memory-core -- search";
120 assert!(should_store(value));
121 }
122
123 #[test]
124 fn single_word_rejected() {
125 assert!(!should_store("hello"));
126 }
127
128 #[test]
129 fn score_capped_at_one() {
130 let value = "src/store/memory.rs::save() handles upsert with blake3 dedup. \
131 See also store/dedup.rs for normalize() and hash_content().";
132 assert!(information_score(value) <= 1.0);
133 }
134
135 #[test]
136 fn custom_threshold() {
137 let value = "test value with some content";
138 let score = information_score(value);
139 assert!(should_store_with_threshold(value, score - 0.01));
140 assert!(!should_store_with_threshold(value, score + 0.01));
141 }
142}