memscope_rs/core/
fast_data_deduplicator.rs1use crate::core::types::TrackingResult;
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::sync::Arc;
9
10#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
12pub struct SimpleDeduplicatedString {
13 pub hash: u64,
14 pub length: usize,
15 pub ref_count: u32,
16}
17
18#[derive(Debug, Default, Clone, Serialize, Deserialize)]
20pub struct SimpleDeduplicationStats {
21 pub total_operations: u64,
22 pub cache_hit_rate: f64,
23 pub memory_saved_bytes: u64,
24}
25
26pub struct SimpleDataDeduplicator {
28 strings: HashMap<u64, Arc<String>>,
29 stats: SimpleDeduplicationStats,
30}
31
32impl SimpleDataDeduplicator {
33 pub fn new() -> Self {
34 Self {
35 strings: HashMap::new(),
36 stats: SimpleDeduplicationStats::default(),
37 }
38 }
39
40 pub fn deduplicate_string(&mut self, input: &str) -> TrackingResult<SimpleDeduplicatedString> {
41 let hash = self.calculate_hash(input);
42 self.stats.total_operations += 1;
43
44 if self.strings.contains_key(&hash) {
45 self.stats.cache_hit_rate = 1.0;
46 return Ok(SimpleDeduplicatedString {
47 hash,
48 length: input.len(),
49 ref_count: 2,
50 });
51 }
52
53 self.strings.insert(hash, Arc::new(input.to_string()));
54 Ok(SimpleDeduplicatedString {
55 hash,
56 length: input.len(),
57 ref_count: 1,
58 })
59 }
60
61 pub fn get_string(&self, dedup_ref: &SimpleDeduplicatedString) -> TrackingResult<Arc<String>> {
62 match self.strings.get(&dedup_ref.hash) {
63 Some(s) => Ok(Arc::clone(s)),
64 None => Ok(Arc::new("not found".to_string())),
65 }
66 }
67
68 pub fn get_stats(&self) -> TrackingResult<SimpleDeduplicationStats> {
69 Ok(self.stats.clone())
70 }
71
72 fn calculate_hash(&self, input: &str) -> u64 {
73 use std::collections::hash_map::DefaultHasher;
74 use std::hash::{Hash, Hasher};
75 let mut hasher = DefaultHasher::new();
76 input.hash(&mut hasher);
77 hasher.finish()
78 }
79}
80
81impl Default for SimpleDataDeduplicator {
82 fn default() -> Self {
83 Self::new()
84 }
85}
86
87static mut GLOBAL_SIMPLE_DEDUPLICATOR: Option<SimpleDataDeduplicator> = None;
89static INIT: std::sync::Once = std::sync::Once::new();
90
91pub fn get_global_simple_data_deduplicator() -> &'static mut SimpleDataDeduplicator {
92 #[allow(static_mut_refs)]
93 unsafe {
94 INIT.call_once(|| {
95 GLOBAL_SIMPLE_DEDUPLICATOR = Some(SimpleDataDeduplicator::new());
96 });
97 GLOBAL_SIMPLE_DEDUPLICATOR.as_mut().unwrap()
98 }
99}
100
101#[cfg(test)]
102mod tests {
103 use super::*;
104
105 #[test]
106 fn test_new_deduplicator() {
107 let dedup = SimpleDataDeduplicator::new();
109 let stats = dedup.get_stats().expect("Failed to get stats");
110
111 assert_eq!(stats.total_operations, 0);
112 assert_eq!(stats.cache_hit_rate, 0.0);
113 assert_eq!(stats.memory_saved_bytes, 0);
114 }
115
116 #[test]
117 fn test_deduplicate_string_first_time() {
118 let mut dedup = SimpleDataDeduplicator::new();
120 let test_str = "test string for deduplication";
121
122 let result = dedup
123 .deduplicate_string(test_str)
124 .expect("Failed to deduplicate string");
125
126 assert_eq!(result.length, test_str.len());
127 assert_eq!(result.ref_count, 1);
128 assert!(result.hash > 0);
129
130 let stats = dedup.get_stats().expect("Failed to get stats");
131 assert_eq!(stats.total_operations, 1);
132 }
133
134 #[test]
135 fn test_deduplicate_string_duplicate() {
136 let mut dedup = SimpleDataDeduplicator::new();
138 let test_str = "duplicate test string";
139
140 let result1 = dedup
141 .deduplicate_string(test_str)
142 .expect("Failed to deduplicate string first time");
143 let result2 = dedup
144 .deduplicate_string(test_str)
145 .expect("Failed to deduplicate string second time");
146
147 assert_eq!(result1.hash, result2.hash);
149 assert_eq!(result1.length, result2.length);
150 assert_eq!(result2.ref_count, 2); let stats = dedup.get_stats().expect("Failed to get stats");
153 assert_eq!(stats.total_operations, 2);
154 assert_eq!(stats.cache_hit_rate, 1.0); }
156
157 #[test]
158 fn test_get_string_existing() {
159 let mut dedup = SimpleDataDeduplicator::new();
161 let test_str = "retrievable string";
162
163 let dedup_ref = dedup
164 .deduplicate_string(test_str)
165 .expect("Failed to deduplicate string");
166
167 let retrieved = dedup
168 .get_string(&dedup_ref)
169 .expect("Failed to retrieve string");
170
171 assert_eq!(*retrieved, test_str);
172 }
173
174 #[test]
175 fn test_get_string_non_existing() {
176 let dedup = SimpleDataDeduplicator::new();
178
179 let fake_ref = SimpleDeduplicatedString {
180 hash: 999999,
181 length: 10,
182 ref_count: 1,
183 };
184
185 let retrieved = dedup
186 .get_string(&fake_ref)
187 .expect("Failed to retrieve string");
188
189 assert_eq!(*retrieved, "not found");
190 }
191
192 #[test]
193 fn test_calculate_hash_consistency() {
194 let dedup = SimpleDataDeduplicator::new();
196 let test_str = "consistent hash test";
197
198 let hash1 = dedup.calculate_hash(test_str);
199 let hash2 = dedup.calculate_hash(test_str);
200
201 assert_eq!(hash1, hash2);
202 assert!(hash1 > 0);
203 }
204
205 #[test]
206 fn test_calculate_hash_different_strings() {
207 let dedup = SimpleDataDeduplicator::new();
209
210 let hash1 = dedup.calculate_hash("string one");
211 let hash2 = dedup.calculate_hash("string two");
212
213 assert_ne!(hash1, hash2);
214 }
215
216 #[test]
217 fn test_default_implementation() {
218 let dedup = SimpleDataDeduplicator::default();
220 let stats = dedup.get_stats().expect("Failed to get stats");
221
222 assert_eq!(stats.total_operations, 0);
223 assert_eq!(stats.cache_hit_rate, 0.0);
224 assert_eq!(stats.memory_saved_bytes, 0);
225 }
226
227 #[test]
228 fn test_multiple_string_deduplication() {
229 let mut dedup = SimpleDataDeduplicator::new();
231
232 let strings = vec![
233 "first string",
234 "second string",
235 "first string", "third string",
237 "second string", "first string", ];
240
241 let mut results = Vec::new();
242 for s in &strings {
243 results.push(
244 dedup
245 .deduplicate_string(s)
246 .expect("Failed to deduplicate string"),
247 );
248 }
249
250 assert_eq!(results[0].hash, results[2].hash);
252 assert_eq!(results[0].hash, results[5].hash);
253 assert_eq!(results[1].hash, results[4].hash);
254
255 assert_ne!(results[0].hash, results[1].hash);
257 assert_ne!(results[0].hash, results[3].hash);
258 assert_ne!(results[1].hash, results[3].hash);
259
260 let stats = dedup.get_stats().expect("Failed to get stats");
261 assert_eq!(stats.total_operations, 6);
262 }
263
264 #[test]
265 fn test_empty_string_deduplication() {
266 let mut dedup = SimpleDataDeduplicator::new();
268
269 let result = dedup
270 .deduplicate_string("")
271 .expect("Failed to deduplicate empty string");
272
273 assert_eq!(result.length, 0);
274 assert_eq!(result.ref_count, 1);
275 assert!(result.hash > 0); }
277
278 #[test]
279 fn test_long_string_deduplication() {
280 let mut dedup = SimpleDataDeduplicator::new();
282 let long_str = "a".repeat(10000);
283
284 let result = dedup
285 .deduplicate_string(&long_str)
286 .expect("Failed to deduplicate long string");
287
288 assert_eq!(result.length, 10000);
289 assert_eq!(result.ref_count, 1);
290
291 let result2 = dedup
293 .deduplicate_string(&long_str)
294 .expect("Failed to deduplicate long string again");
295
296 assert_eq!(result.hash, result2.hash);
297 assert_eq!(result2.ref_count, 2);
298 }
299
300 #[test]
301 fn test_unicode_string_deduplication() {
302 let mut dedup = SimpleDataDeduplicator::new();
304 let unicode_str = "Hello World 🌍";
305
306 let result = dedup
307 .deduplicate_string(unicode_str)
308 .expect("Failed to deduplicate unicode string");
309
310 assert_eq!(result.length, unicode_str.len());
311
312 let retrieved = dedup
313 .get_string(&result)
314 .expect("Failed to retrieve unicode string");
315
316 assert_eq!(*retrieved, unicode_str);
317 }
318
319 #[test]
320 fn test_stats_accuracy() {
321 let mut dedup = SimpleDataDeduplicator::new();
323
324 dedup
326 .deduplicate_string("unique1")
327 .expect("Failed to deduplicate");
328 dedup
329 .deduplicate_string("unique2")
330 .expect("Failed to deduplicate");
331 dedup
332 .deduplicate_string("unique3")
333 .expect("Failed to deduplicate");
334
335 dedup
337 .deduplicate_string("unique1")
338 .expect("Failed to deduplicate");
339 dedup
340 .deduplicate_string("unique2")
341 .expect("Failed to deduplicate");
342
343 let stats = dedup.get_stats().expect("Failed to get stats");
344 assert_eq!(stats.total_operations, 5);
345 assert_eq!(stats.cache_hit_rate, 1.0);
347 }
348}