1use crate::analysis::unsafe_ffi_tracker::StackFrame;
11use crate::core::safe_operations::SafeLock;
12use crate::core::types::TrackingResult;
13use dashmap::DashMap;
14use serde::{Deserialize, Serialize};
15use std::collections::HashMap;
16use std::hash::{Hash, Hasher};
17use std::sync::{Arc, Mutex};
18
19#[derive(Debug, Clone)]
21pub struct DeduplicationConfig {
22 pub enable_string_dedup: bool,
24 pub enable_stack_dedup: bool,
26 pub enable_metadata_dedup: bool,
28 pub max_cache_size: usize,
30 pub enable_compression: bool,
32 pub compression_threshold: usize,
34 pub enable_stats: bool,
36 pub cleanup_threshold: f64,
38}
39
40impl Default for DeduplicationConfig {
41 fn default() -> Self {
42 Self {
43 enable_string_dedup: true,
44 enable_stack_dedup: true,
45 enable_metadata_dedup: true,
46 max_cache_size: 50000,
47 enable_compression: true,
48 compression_threshold: 1024,
49 enable_stats: true,
50 cleanup_threshold: 0.8,
51 }
52 }
53}
54
55#[derive(Debug, Default, Clone, Serialize, Deserialize)]
57pub struct DeduplicationStats {
58 pub strings_deduplicated: u64,
59 pub stack_traces_deduplicated: u64,
60 pub metadata_deduplicated: u64,
61 pub memory_saved_bytes: u64,
62 pub compression_ratio: f64,
63 pub cache_hit_rate: f64,
64 pub total_operations: u64,
65 pub cleanup_operations: u64,
66}
67
68#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
70pub struct DeduplicatedString {
71 pub hash: u64,
73 pub length: usize,
75 pub ref_count: u64,
77}
78
79#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
81pub struct DeduplicatedStackTrace {
82 pub hash: u64,
84 pub frame_count: usize,
86 pub ref_count: u64,
88}
89
90#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
92pub struct DeduplicatedMetadata {
93 pub hash: u64,
95 pub entry_count: usize,
97 pub ref_count: u64,
99}
100
101pub struct ComprehensiveDataDeduplicator {
103 string_storage: DashMap<u64, Arc<String>>,
105 string_refs: DashMap<u64, DeduplicatedString>,
107 stack_storage: DashMap<u64, Arc<Vec<StackFrame>>>,
109 stack_refs: DashMap<u64, DeduplicatedStackTrace>,
111 metadata_storage: DashMap<u64, Arc<HashMap<String, String>>>,
113 metadata_refs: DashMap<u64, DeduplicatedMetadata>,
115 compressed_storage: DashMap<u64, Arc<Vec<u8>>>,
117 stats: Arc<Mutex<DeduplicationStats>>,
119 config: DeduplicationConfig,
121}
122
123impl ComprehensiveDataDeduplicator {
124 pub fn new(config: DeduplicationConfig) -> Self {
126 tracing::info!("🔄 Initializing Comprehensive Data Deduplicator");
127 tracing::info!(" • String dedup: {}", config.enable_string_dedup);
128 tracing::info!(" • Stack dedup: {}", config.enable_stack_dedup);
129 tracing::info!(" • Metadata dedup: {}", config.enable_metadata_dedup);
130 tracing::info!(" • Compression: {}", config.enable_compression);
131
132 Self {
133 string_storage: DashMap::with_capacity(config.max_cache_size),
134 string_refs: DashMap::with_capacity(config.max_cache_size),
135 stack_storage: DashMap::with_capacity(config.max_cache_size),
136 stack_refs: DashMap::with_capacity(config.max_cache_size),
137 metadata_storage: DashMap::with_capacity(config.max_cache_size),
138 metadata_refs: DashMap::with_capacity(config.max_cache_size),
139 compressed_storage: DashMap::new(),
140 stats: Arc::new(Mutex::new(DeduplicationStats::default())),
141 config,
142 }
143 }
144
145 pub fn deduplicate_string(&self, input: &str) -> TrackingResult<DeduplicatedString> {
147 if !self.config.enable_string_dedup {
148 return Ok(DeduplicatedString {
149 hash: self.calculate_string_hash(input),
150 length: input.len(),
151 ref_count: 1,
152 });
153 }
154
155 let hash = self.calculate_string_hash(input);
156
157 match self.string_refs.entry(hash) {
159 dashmap::mapref::entry::Entry::Occupied(mut entry) => {
160 let updated_ref = {
162 let current = entry.get();
163 DeduplicatedString {
164 hash: current.hash,
165 length: current.length,
166 ref_count: current.ref_count + 1,
167 }
168 };
169 entry.insert(updated_ref);
170
171 self.update_stats_string_dedup();
172 tracing::debug!("🔄 String deduplicated: hash={}", hash);
173 Ok(updated_ref)
176 }
177 dashmap::mapref::entry::Entry::Vacant(entry) => {
178 let dedup_ref = DeduplicatedString {
180 hash,
181 length: input.len(),
182 ref_count: 1,
183 };
184
185 if self.config.enable_compression && input.len() > self.config.compression_threshold
187 {
188 let compressed = self.compress_data(input.as_bytes())?;
189 self.compressed_storage.insert(hash, Arc::new(compressed));
190 } else {
191 let arc_string = Arc::new(input.to_string());
192 self.string_storage.insert(hash, arc_string);
193 }
194
195 entry.insert(dedup_ref);
197 self.update_stats_total_operations();
198
199 tracing::debug!(
200 "🔄 New string stored: hash={}, length={}",
201 hash,
202 input.len()
203 );
204 Ok(dedup_ref)
205 }
206 }
207 }
208
209 pub fn get_string(&self, dedup_ref: &DeduplicatedString) -> TrackingResult<Arc<String>> {
211 let hash = dedup_ref.hash;
212
213 if let Some(string) = self.string_storage.get(&hash) {
215 return Ok(Arc::clone(string.value()));
216 }
217
218 if let Some(compressed) = self.compressed_storage.get(&hash) {
220 let decompressed = self.decompress_data(&compressed)?;
221 let string = String::from_utf8(decompressed).map_err(|e| {
222 crate::core::types::TrackingError::DataError(format!(
223 "Failed to decode decompressed string: {e}"
224 ))
225 })?;
226 return Ok(Arc::new(string));
227 }
228
229 Err(crate::core::types::TrackingError::DataError(format!(
230 "String with hash {hash} not found"
231 )))
232 }
233
234 pub fn deduplicate_stack_trace(
236 &self,
237 frames: &[StackFrame],
238 ) -> TrackingResult<DeduplicatedStackTrace> {
239 if !self.config.enable_stack_dedup {
240 return Ok(DeduplicatedStackTrace {
241 hash: self.calculate_stack_hash(frames),
242 frame_count: frames.len(),
243 ref_count: 1,
244 });
245 }
246
247 let hash = self.calculate_stack_hash(frames);
248
249 match self.stack_refs.entry(hash) {
251 dashmap::mapref::entry::Entry::Occupied(mut entry) => {
252 let updated_ref = {
254 let current = entry.get();
255 DeduplicatedStackTrace {
256 hash: current.hash,
257 frame_count: current.frame_count,
258 ref_count: current.ref_count + 1,
259 }
260 };
261 entry.insert(updated_ref);
262
263 self.update_stats_stack_dedup();
264 tracing::debug!("🔄 Stack trace deduplicated: hash={}", hash);
265 Ok(updated_ref)
268 }
269 dashmap::mapref::entry::Entry::Vacant(entry) => {
270 let dedup_ref = DeduplicatedStackTrace {
272 hash,
273 frame_count: frames.len(),
274 ref_count: 1,
275 };
276
277 let serialized_size = std::mem::size_of_val(frames);
279 if self.config.enable_compression
280 && serialized_size > self.config.compression_threshold
281 {
282 let serialized = self.serialize_stack_frames(frames)?;
283 let compressed = self.compress_data(&serialized)?;
284 self.compressed_storage.insert(hash, Arc::new(compressed));
285 } else {
286 let arc_frames = Arc::new(frames.to_vec());
287 self.stack_storage.insert(hash, arc_frames);
288 }
289
290 entry.insert(dedup_ref);
292 self.update_stats_total_operations();
293
294 tracing::debug!(
295 "🔄 New stack trace stored: hash={}, frames={}",
296 hash,
297 frames.len()
298 );
299 Ok(dedup_ref)
300 }
301 }
302 }
303
304 pub fn get_stack_trace(
306 &self,
307 dedup_ref: &DeduplicatedStackTrace,
308 ) -> TrackingResult<Arc<Vec<StackFrame>>> {
309 let hash = dedup_ref.hash;
310
311 if let Some(frames) = self.stack_storage.get(&hash) {
313 return Ok(Arc::clone(frames.value()));
314 }
315
316 if let Some(compressed) = self.compressed_storage.get(&hash) {
318 let decompressed = self.decompress_data(&compressed)?;
319 let frames = self.deserialize_stack_frames(&decompressed)?;
320 return Ok(Arc::new(frames));
321 }
322
323 Err(crate::core::types::TrackingError::DataError(format!(
324 "Stack trace with hash {hash} not found"
325 )))
326 }
327
328 pub fn deduplicate_metadata(
330 &self,
331 metadata: &HashMap<String, String>,
332 ) -> TrackingResult<DeduplicatedMetadata> {
333 if !self.config.enable_metadata_dedup {
334 return Ok(DeduplicatedMetadata {
335 hash: self.calculate_metadata_hash(metadata),
336 entry_count: metadata.len(),
337 ref_count: 1,
338 });
339 }
340
341 let hash = self.calculate_metadata_hash(metadata);
342
343 match self.metadata_refs.entry(hash) {
345 dashmap::mapref::entry::Entry::Occupied(mut entry) => {
346 let updated_ref = {
348 let current = entry.get();
349 DeduplicatedMetadata {
350 hash: current.hash,
351 entry_count: current.entry_count,
352 ref_count: current.ref_count + 1,
353 }
354 };
355 entry.insert(updated_ref);
356
357 self.update_stats_metadata_dedup();
358 tracing::debug!("🔄 Metadata deduplicated: hash={}", hash);
359 Ok(updated_ref)
362 }
363 dashmap::mapref::entry::Entry::Vacant(entry) => {
364 let dedup_ref = DeduplicatedMetadata {
366 hash,
367 entry_count: metadata.len(),
368 ref_count: 1,
369 };
370
371 let serialized_size = metadata
373 .iter()
374 .map(|(k, v)| k.len() + v.len())
375 .sum::<usize>();
376
377 if self.config.enable_compression
378 && serialized_size > self.config.compression_threshold
379 {
380 let serialized = self.serialize_metadata(metadata)?;
381 let compressed = self.compress_data(&serialized)?;
382 self.compressed_storage.insert(hash, Arc::new(compressed));
383 } else {
384 let arc_metadata = Arc::new(metadata.to_owned());
385 self.metadata_storage.insert(hash, arc_metadata);
386 }
387
388 entry.insert(dedup_ref);
390 self.update_stats_total_operations();
391
392 tracing::debug!(
393 "🔄 New metadata stored: hash={}, entries={}",
394 hash,
395 metadata.len()
396 );
397 Ok(dedup_ref)
398 }
399 }
400 }
401
402 pub fn get_metadata(
404 &self,
405 dedup_ref: &DeduplicatedMetadata,
406 ) -> TrackingResult<Arc<HashMap<String, String>>> {
407 let hash = dedup_ref.hash;
408
409 if let Some(metadata) = self.metadata_storage.get(&hash) {
411 return Ok(Arc::clone(metadata.value()));
412 }
413
414 if let Some(compressed) = self.compressed_storage.get(&hash) {
416 let decompressed = self.decompress_data(&compressed)?;
417 let metadata = self.deserialize_metadata(&decompressed)?;
418 return Ok(Arc::new(metadata));
419 }
420
421 Err(crate::core::types::TrackingError::DataError(format!(
422 "Metadata with hash {hash} not found"
423 )))
424 }
425
426 pub fn get_stats(&self) -> TrackingResult<DeduplicationStats> {
428 match self.stats.safe_lock() {
429 Ok(stats) => {
430 let mut result = stats.clone();
431
432 if result.total_operations > 0 {
434 let total_dedups = result.strings_deduplicated
435 + result.stack_traces_deduplicated
436 + result.metadata_deduplicated;
437 result.cache_hit_rate = total_dedups as f64 / result.total_operations as f64;
438 }
439
440 Ok(result)
441 }
442 Err(e) => {
443 tracing::warn!("Failed to get deduplication stats: {}", e);
444 Ok(DeduplicationStats::default())
445 }
446 }
447 }
448
449 pub fn clear_all(&self) {
451 self.string_storage.clear();
452 self.string_refs.clear();
453 self.stack_storage.clear();
454 self.stack_refs.clear();
455 self.metadata_storage.clear();
456 self.metadata_refs.clear();
457 self.compressed_storage.clear();
458
459 match self.stats.safe_lock() {
460 Ok(mut stats) => {
461 *stats = DeduplicationStats::default();
462 }
463 Err(e) => {
464 tracing::warn!("Failed to reset stats during clear: {}", e);
465 }
466 }
467
468 tracing::info!("🔄 Cleared all deduplicated data");
469 }
470
471 fn calculate_string_hash(&self, input: &str) -> u64 {
473 use std::collections::hash_map::DefaultHasher;
474 let mut hasher = DefaultHasher::new();
475 input.hash(&mut hasher);
476 hasher.finish()
477 }
478
479 fn calculate_stack_hash(&self, frames: &[StackFrame]) -> u64 {
481 use std::collections::hash_map::DefaultHasher;
482 let mut hasher = DefaultHasher::new();
483 for frame in frames {
484 frame.function_name.hash(&mut hasher);
485 frame.file_name.hash(&mut hasher);
486 frame.line_number.hash(&mut hasher);
487 }
488 hasher.finish()
489 }
490
491 fn calculate_metadata_hash(&self, metadata: &HashMap<String, String>) -> u64 {
493 use std::collections::hash_map::DefaultHasher;
494 let mut hasher = DefaultHasher::new();
495
496 let mut sorted_pairs: Vec<_> = metadata.iter().collect();
498 sorted_pairs.sort_by_key(|(k, _)| *k);
499
500 for (key, value) in sorted_pairs {
501 key.hash(&mut hasher);
502 value.hash(&mut hasher);
503 }
504 hasher.finish()
505 }
506
507 fn compress_data(&self, data: &[u8]) -> TrackingResult<Vec<u8>> {
509 let mut compressed = Vec::with_capacity(data.len() / 2);
511 compressed.extend_from_slice(b"COMPRESSED:");
512 compressed.extend_from_slice(data);
513 Ok(compressed)
514 }
515
516 fn decompress_data(&self, compressed: &[u8]) -> TrackingResult<Vec<u8>> {
518 if compressed.starts_with(b"COMPRESSED:") {
520 Ok(compressed[11..].to_vec())
521 } else {
522 Err(crate::core::types::TrackingError::DataError(
523 "Invalid compressed data format".to_string(),
524 ))
525 }
526 }
527
528 fn serialize_stack_frames(&self, frames: &[StackFrame]) -> TrackingResult<Vec<u8>> {
530 let serialized = format!("{frames:?}");
532 Ok(serialized.into_bytes())
533 }
534
535 fn deserialize_stack_frames(&self, data: &[u8]) -> TrackingResult<Vec<StackFrame>> {
537 let _serialized = String::from_utf8(data.to_vec()).map_err(|e| {
539 crate::core::types::TrackingError::DataError(format!(
540 "Failed to decode serialized stack frames: {e}"
541 ))
542 })?;
543
544 Ok(Vec::new())
546 }
547
548 fn serialize_metadata(&self, metadata: &HashMap<String, String>) -> TrackingResult<Vec<u8>> {
550 let serialized = format!("{metadata:?}");
552 Ok(serialized.into_bytes())
553 }
554
555 fn deserialize_metadata(&self, data: &[u8]) -> TrackingResult<HashMap<String, String>> {
557 let _serialized = String::from_utf8(data.to_vec()).map_err(|e| {
559 crate::core::types::TrackingError::DataError(format!(
560 "Failed to decode serialized metadata: {e}"
561 ))
562 })?;
563
564 Ok(HashMap::new())
566 }
567
568 fn update_stats_string_dedup(&self) {
570 if !self.config.enable_stats {
571 return;
572 }
573
574 match self.stats.safe_lock() {
575 Ok(mut stats) => {
576 stats.strings_deduplicated += 1;
577 stats.memory_saved_bytes += std::mem::size_of::<String>() as u64;
578 }
579 Err(e) => {
580 tracing::warn!("Failed to update string dedup stats: {}", e);
581 }
582 }
583 }
584
585 fn update_stats_stack_dedup(&self) {
586 if !self.config.enable_stats {
587 return;
588 }
589
590 match self.stats.safe_lock() {
591 Ok(mut stats) => {
592 stats.stack_traces_deduplicated += 1;
593 stats.memory_saved_bytes += std::mem::size_of::<Vec<StackFrame>>() as u64;
594 }
595 Err(e) => {
596 tracing::warn!("Failed to update stack dedup stats: {}", e);
597 }
598 }
599 }
600
601 fn update_stats_metadata_dedup(&self) {
602 if !self.config.enable_stats {
603 return;
604 }
605
606 match self.stats.safe_lock() {
607 Ok(mut stats) => {
608 stats.metadata_deduplicated += 1;
609 stats.memory_saved_bytes += std::mem::size_of::<HashMap<String, String>>() as u64;
610 }
611 Err(e) => {
612 tracing::warn!("Failed to update metadata dedup stats: {}", e);
613 }
614 }
615 }
616
617 fn update_stats_total_operations(&self) {
618 if !self.config.enable_stats {
619 return;
620 }
621
622 match self.stats.safe_lock() {
623 Ok(mut stats) => {
624 stats.total_operations += 1;
625 }
626 Err(e) => {
627 tracing::warn!("Failed to update total operations stats: {}", e);
628 }
629 }
630 }
631}
632
633static GLOBAL_DATA_DEDUPLICATOR: std::sync::OnceLock<Arc<ComprehensiveDataDeduplicator>> =
637 std::sync::OnceLock::new();
638
639pub fn get_global_data_deduplicator() -> Arc<ComprehensiveDataDeduplicator> {
641 GLOBAL_DATA_DEDUPLICATOR
642 .get_or_init(|| {
643 Arc::new(ComprehensiveDataDeduplicator::new(
644 DeduplicationConfig::default(),
645 ))
646 })
647 .clone()
648}
649
650pub fn initialize_global_data_deduplicator(
652 config: DeduplicationConfig,
653) -> Arc<ComprehensiveDataDeduplicator> {
654 let deduplicator = Arc::new(ComprehensiveDataDeduplicator::new(config));
655 match GLOBAL_DATA_DEDUPLICATOR.set(deduplicator.clone()) {
656 Ok(_) => tracing::info!("🔄 Global comprehensive data deduplicator initialized"),
657 Err(_) => tracing::warn!("🔄 Global comprehensive data deduplicator already initialized"),
658 }
659 deduplicator
660}
661
662#[cfg(test)]
663mod tests {
664 use super::*;
665 use std::collections::HashMap;
666
667 fn create_test_stack_frame(function: &str, file: &str, line: u32) -> StackFrame {
669 StackFrame {
670 function_name: function.to_string(),
671 file_name: Some(file.to_string()),
672 line_number: Some(line),
673 is_unsafe: false,
674 }
675 }
676
677 fn create_test_metadata() -> HashMap<String, String> {
679 let mut metadata = HashMap::new();
680 metadata.insert("key1".to_string(), "value1".to_string());
681 metadata.insert("key2".to_string(), "value2".to_string());
682 metadata.insert("key3".to_string(), "value3".to_string());
683 metadata
684 }
685
686 #[test]
687 fn test_deduplication_config_default() {
688 let config = DeduplicationConfig::default();
689
690 assert!(config.enable_string_dedup);
691 assert!(config.enable_stack_dedup);
692 assert!(config.enable_metadata_dedup);
693 assert_eq!(config.max_cache_size, 50000);
694 assert!(config.enable_compression);
695 assert_eq!(config.compression_threshold, 1024);
696 assert!(config.enable_stats);
697 assert_eq!(config.cleanup_threshold, 0.8);
698 }
699
700 #[test]
701 fn test_deduplication_config_custom() {
702 let config = DeduplicationConfig {
703 enable_string_dedup: false,
704 enable_stack_dedup: true,
705 enable_metadata_dedup: false,
706 max_cache_size: 1000,
707 enable_compression: false,
708 compression_threshold: 2048,
709 enable_stats: false,
710 cleanup_threshold: 0.5,
711 };
712
713 assert!(!config.enable_string_dedup);
714 assert!(config.enable_stack_dedup);
715 assert!(!config.enable_metadata_dedup);
716 assert_eq!(config.max_cache_size, 1000);
717 assert!(!config.enable_compression);
718 assert_eq!(config.compression_threshold, 2048);
719 assert!(!config.enable_stats);
720 assert_eq!(config.cleanup_threshold, 0.5);
721 }
722
723 #[test]
724 fn test_deduplication_stats_default() {
725 let stats = DeduplicationStats::default();
726
727 assert_eq!(stats.strings_deduplicated, 0);
728 assert_eq!(stats.stack_traces_deduplicated, 0);
729 assert_eq!(stats.metadata_deduplicated, 0);
730 assert_eq!(stats.memory_saved_bytes, 0);
731 assert_eq!(stats.compression_ratio, 0.0);
732 assert_eq!(stats.cache_hit_rate, 0.0);
733 assert_eq!(stats.total_operations, 0);
734 assert_eq!(stats.cleanup_operations, 0);
735 }
736
737 #[test]
738 fn test_comprehensive_data_deduplicator_new() {
739 let config = DeduplicationConfig::default();
740 let deduplicator = ComprehensiveDataDeduplicator::new(config);
741
742 assert_eq!(deduplicator.string_storage.len(), 0);
744 assert_eq!(deduplicator.string_refs.len(), 0);
745 assert_eq!(deduplicator.stack_storage.len(), 0);
746 assert_eq!(deduplicator.stack_refs.len(), 0);
747 assert_eq!(deduplicator.metadata_storage.len(), 0);
748 assert_eq!(deduplicator.metadata_refs.len(), 0);
749 assert_eq!(deduplicator.compressed_storage.len(), 0);
750 }
751 #[test]
752 fn test_string_deduplication_enabled() {
753 let config = DeduplicationConfig {
754 enable_stats: false,
755 enable_compression: false,
756 ..Default::default()
757 }; let deduplicator = ComprehensiveDataDeduplicator::new(config);
759
760 let test_string = "Hello, World!";
761
762 let result1 = deduplicator
764 .deduplicate_string(test_string)
765 .expect("Failed to deduplicate string");
766 assert_eq!(result1.length, test_string.len());
767 assert_eq!(result1.ref_count, 1);
768
769 assert_eq!(deduplicator.string_storage.len(), 1);
771 assert_eq!(deduplicator.string_refs.len(), 1);
772
773 let retrieved1 = deduplicator
775 .get_string(&result1)
776 .expect("Failed to get string with result1");
777 assert_eq!(*retrieved1, test_string);
778
779 let result2 = deduplicator
782 .deduplicate_string(test_string)
783 .expect("Failed to deduplicate string");
784 assert_eq!(result2.hash, result1.hash);
785 assert_eq!(result2.ref_count, 2);
786
787 let retrieved2 = deduplicator
789 .get_string(&result2)
790 .expect("Failed to get string with result2");
791 assert_eq!(*retrieved2, test_string);
792
793 for i in 3..=10 {
795 let result = deduplicator
796 .deduplicate_string(test_string)
797 .unwrap_or_else(|_| panic!("Call {i} should succeed without deadlock"));
798 assert_eq!(result.hash, result1.hash);
799 assert_eq!(result.ref_count, i);
800 assert_eq!(result.length, test_string.len());
801 }
802
803 assert_eq!(deduplicator.string_storage.len(), 1);
805 assert_eq!(deduplicator.string_refs.len(), 1);
806 }
807
808 #[test]
809 fn test_string_deduplication_disabled() {
810 let config = DeduplicationConfig {
811 enable_string_dedup: false,
812 ..Default::default()
813 };
814 let deduplicator = ComprehensiveDataDeduplicator::new(config);
815
816 let test_string = "Hello, World!";
817
818 let result1 = deduplicator
820 .deduplicate_string(test_string)
821 .expect("Failed to deduplicate string");
822 assert_eq!(result1.length, test_string.len());
823 assert_eq!(result1.ref_count, 1);
824
825 let result2 = deduplicator
827 .deduplicate_string(test_string)
828 .expect("Failed to deduplicate string");
829 assert_eq!(result2.hash, result1.hash);
830 assert_eq!(result2.ref_count, 1); }
832
833 #[test]
834 fn test_string_compression() {
835 let config = DeduplicationConfig {
836 compression_threshold: 10,
837 ..Default::default()
838 }; let deduplicator = ComprehensiveDataDeduplicator::new(config);
840
841 let large_string = "This is a large string that should be compressed".repeat(10);
842
843 let result = deduplicator
844 .deduplicate_string(&large_string)
845 .expect("Failed to deduplicate string");
846 assert_eq!(result.length, large_string.len());
847
848 let retrieved = deduplicator
850 .get_string(&result)
851 .expect("Failed to get compressed string");
852 assert_eq!(*retrieved, large_string);
853 }
854 #[test]
855 fn test_stack_trace_deduplication_enabled() {
856 let config = DeduplicationConfig {
857 enable_stats: false,
858 enable_compression: false,
859 ..Default::default()
860 }; let deduplicator = ComprehensiveDataDeduplicator::new(config);
862
863 let frames = vec![
864 create_test_stack_frame("main", "main.rs", 10),
865 create_test_stack_frame("foo", "lib.rs", 20),
866 create_test_stack_frame("bar", "lib.rs", 30),
867 ];
868
869 let result1 = deduplicator
871 .deduplicate_stack_trace(&frames)
872 .expect("Failed to deduplicate stack trace");
873 assert_eq!(result1.frame_count, frames.len());
874 assert_eq!(result1.ref_count, 1);
875
876 assert_eq!(deduplicator.stack_storage.len(), 1);
878 assert_eq!(deduplicator.stack_refs.len(), 1);
879
880 let retrieved1 = deduplicator
882 .get_stack_trace(&result1)
883 .expect("Failed to get stack trace with result1");
884 assert_eq!(retrieved1.len(), frames.len());
885
886 let result2 = deduplicator
888 .deduplicate_stack_trace(&frames)
889 .expect("Failed to deduplicate stack trace");
890 assert_eq!(result2.hash, result1.hash);
891 assert_eq!(result2.ref_count, 2);
892
893 let retrieved2 = deduplicator
895 .get_stack_trace(&result2)
896 .expect("Failed to get stack trace with result2");
897 assert_eq!(retrieved2.len(), frames.len());
898 }
899
900 #[test]
901 fn test_stack_trace_deduplication_disabled() {
902 let config = DeduplicationConfig {
903 enable_stack_dedup: false,
904 ..Default::default()
905 };
906 let deduplicator = ComprehensiveDataDeduplicator::new(config);
907
908 let frames = vec![
909 create_test_stack_frame("main", "main.rs", 10),
910 create_test_stack_frame("foo", "lib.rs", 20),
911 ];
912
913 let result1 = deduplicator
915 .deduplicate_stack_trace(&frames)
916 .expect("Failed to deduplicate stack trace");
917 assert_eq!(result1.frame_count, frames.len());
918 assert_eq!(result1.ref_count, 1);
919
920 let result2 = deduplicator
922 .deduplicate_stack_trace(&frames)
923 .expect("Failed to deduplicate stack trace");
924 assert_eq!(result2.hash, result1.hash);
925 assert_eq!(result2.ref_count, 1); }
927 #[test]
928 fn test_metadata_deduplication_enabled() {
929 let config = DeduplicationConfig {
930 enable_stats: false,
931 enable_compression: false,
932 ..Default::default()
933 }; let deduplicator = ComprehensiveDataDeduplicator::new(config);
935
936 let metadata = create_test_metadata();
937
938 let result1 = deduplicator
940 .deduplicate_metadata(&metadata)
941 .expect("Failed to deduplicate metadata");
942 assert_eq!(result1.entry_count, metadata.len());
943 assert_eq!(result1.ref_count, 1);
944
945 assert_eq!(deduplicator.metadata_storage.len(), 1);
947 assert_eq!(deduplicator.metadata_refs.len(), 1);
948
949 let retrieved1 = deduplicator
951 .get_metadata(&result1)
952 .expect("Failed to get metadata with result1");
953 assert_eq!(retrieved1.len(), metadata.len());
954
955 let result2 = deduplicator
957 .deduplicate_metadata(&metadata)
958 .expect("Failed to deduplicate metadata");
959 assert_eq!(result2.hash, result1.hash);
960 assert_eq!(result2.ref_count, 2);
961
962 let retrieved2 = deduplicator
964 .get_metadata(&result2)
965 .expect("Failed to get metadata with result2");
966 assert_eq!(retrieved2.len(), metadata.len());
967 }
968
969 #[test]
970 fn test_metadata_deduplication_disabled() {
971 let config = DeduplicationConfig {
972 enable_metadata_dedup: false,
973 ..Default::default()
974 };
975 let deduplicator = ComprehensiveDataDeduplicator::new(config);
976
977 let metadata = create_test_metadata();
978
979 let result1 = deduplicator
981 .deduplicate_metadata(&metadata)
982 .expect("Failed to deduplicate metadata");
983 assert_eq!(result1.entry_count, metadata.len());
984 assert_eq!(result1.ref_count, 1);
985
986 let result2 = deduplicator
988 .deduplicate_metadata(&metadata)
989 .expect("Failed to deduplicate metadata");
990 assert_eq!(result2.hash, result1.hash);
991 assert_eq!(result2.ref_count, 1); }
993
994 #[test]
995 fn test_metadata_compression() {
996 let config = DeduplicationConfig {
997 compression_threshold: 10,
998 ..Default::default()
999 }; let deduplicator = ComprehensiveDataDeduplicator::new(config);
1001
1002 let mut large_metadata = HashMap::new();
1003 for i in 0..100 {
1004 large_metadata.insert(format!("key_{i}"), format!("value_{i}"));
1005 }
1006
1007 let result = deduplicator
1008 .deduplicate_metadata(&large_metadata)
1009 .expect("Failed to deduplicate metadata");
1010 assert_eq!(result.entry_count, large_metadata.len());
1011
1012 let retrieved = deduplicator
1014 .get_metadata(&result)
1015 .expect("Failed to get compressed metadata");
1016 assert!(retrieved.is_empty()); }
1019
1020 #[test]
1021 fn test_get_stats() {
1022 let config = DeduplicationConfig::default();
1023 let deduplicator = ComprehensiveDataDeduplicator::new(config);
1024
1025 let stats = deduplicator.get_stats().expect("Failed to get stats");
1026 assert_eq!(stats.strings_deduplicated, 0);
1027 assert_eq!(stats.stack_traces_deduplicated, 0);
1028 assert_eq!(stats.metadata_deduplicated, 0);
1029 assert_eq!(stats.cache_hit_rate, 0.0);
1030 }
1031
1032 #[test]
1033 fn test_clear_all() {
1034 let config = DeduplicationConfig::default();
1035 let deduplicator = ComprehensiveDataDeduplicator::new(config);
1036
1037 let test_string = "test";
1039 let frames = vec![create_test_stack_frame("main", "main.rs", 10)];
1040 let metadata = create_test_metadata();
1041
1042 let _string_ref = deduplicator
1043 .deduplicate_string(test_string)
1044 .expect("Failed to deduplicate string");
1045 let _stack_ref = deduplicator
1046 .deduplicate_stack_trace(&frames)
1047 .expect("Failed to deduplicate stack trace");
1048 let _metadata_ref = deduplicator
1049 .deduplicate_metadata(&metadata)
1050 .expect("Failed to deduplicate metadata");
1051
1052 assert!(
1054 !deduplicator.string_storage.is_empty() || !deduplicator.compressed_storage.is_empty()
1055 );
1056
1057 deduplicator.clear_all();
1059
1060 assert!(deduplicator.string_storage.is_empty());
1062 assert!(deduplicator.string_refs.is_empty());
1063 assert!(deduplicator.stack_storage.is_empty());
1064 assert!(deduplicator.stack_refs.is_empty());
1065 assert!(deduplicator.metadata_storage.is_empty());
1066 assert!(deduplicator.metadata_refs.is_empty());
1067 assert!(deduplicator.compressed_storage.is_empty());
1068 }
1069
1070 #[test]
1071 fn test_hash_calculation_consistency() {
1072 let config = DeduplicationConfig::default();
1073 let deduplicator = ComprehensiveDataDeduplicator::new(config);
1074
1075 let test_string = "consistent_hash_test";
1076 let hash1 = deduplicator.calculate_string_hash(test_string);
1077 let hash2 = deduplicator.calculate_string_hash(test_string);
1078 assert_eq!(hash1, hash2);
1079
1080 let frames = vec![create_test_stack_frame("main", "main.rs", 10)];
1081 let stack_hash1 = deduplicator.calculate_stack_hash(&frames);
1082 let stack_hash2 = deduplicator.calculate_stack_hash(&frames);
1083 assert_eq!(stack_hash1, stack_hash2);
1084
1085 let metadata = create_test_metadata();
1086 let meta_hash1 = deduplicator.calculate_metadata_hash(&metadata);
1087 let meta_hash2 = deduplicator.calculate_metadata_hash(&metadata);
1088 assert_eq!(meta_hash1, meta_hash2);
1089 }
1090
1091 #[test]
1092 fn test_compression_decompression() {
1093 let config = DeduplicationConfig::default();
1094 let deduplicator = ComprehensiveDataDeduplicator::new(config);
1095
1096 let test_data = b"Hello, World! This is test data for compression.";
1097
1098 let compressed = deduplicator
1099 .compress_data(test_data)
1100 .expect("Failed to compress data");
1101 assert!(compressed.len() > test_data.len()); let decompressed = deduplicator
1104 .decompress_data(&compressed)
1105 .expect("Failed to decompress data");
1106 assert_eq!(decompressed, test_data);
1107 }
1108
1109 #[test]
1110 fn test_compression_invalid_format() {
1111 let config = DeduplicationConfig::default();
1112 let deduplicator = ComprehensiveDataDeduplicator::new(config);
1113
1114 let invalid_data = b"INVALID_FORMAT:data";
1115 let result = deduplicator.decompress_data(invalid_data);
1116 assert!(result.is_err());
1117 }
1118
1119 #[test]
1120 fn test_serialization_deserialization() {
1121 let config = DeduplicationConfig::default();
1122 let deduplicator = ComprehensiveDataDeduplicator::new(config);
1123
1124 let frames = vec![
1125 create_test_stack_frame("main", "main.rs", 10),
1126 create_test_stack_frame("foo", "lib.rs", 20),
1127 ];
1128
1129 let serialized = deduplicator
1130 .serialize_stack_frames(&frames)
1131 .expect("Failed to serialize frames");
1132 assert!(!serialized.is_empty());
1133
1134 let deserialized = deduplicator
1135 .deserialize_stack_frames(&serialized)
1136 .expect("Failed to deserialize frames");
1137 assert!(deserialized.is_empty());
1139
1140 let metadata = create_test_metadata();
1141 let serialized_meta = deduplicator
1142 .serialize_metadata(&metadata)
1143 .expect("Failed to serialize metadata");
1144 assert!(!serialized_meta.is_empty());
1145
1146 let deserialized_meta = deduplicator
1147 .deserialize_metadata(&serialized_meta)
1148 .expect("Failed to deserialize metadata");
1149 assert!(deserialized_meta.is_empty());
1151 }
1152
1153 #[test]
1154 fn test_get_nonexistent_data() {
1155 let config = DeduplicationConfig::default();
1156 let deduplicator = ComprehensiveDataDeduplicator::new(config);
1157
1158 let fake_string_ref = DeduplicatedString {
1159 hash: 12345,
1160 length: 10,
1161 ref_count: 1,
1162 };
1163 let result = deduplicator.get_string(&fake_string_ref);
1164 assert!(result.is_err());
1165
1166 let fake_stack_ref = DeduplicatedStackTrace {
1167 hash: 67890,
1168 frame_count: 5,
1169 ref_count: 1,
1170 };
1171 let result = deduplicator.get_stack_trace(&fake_stack_ref);
1172 assert!(result.is_err());
1173
1174 let fake_metadata_ref = DeduplicatedMetadata {
1175 hash: 11111,
1176 entry_count: 3,
1177 ref_count: 1,
1178 };
1179 let result = deduplicator.get_metadata(&fake_metadata_ref);
1180 assert!(result.is_err());
1181 }
1182
1183 #[test]
1184 fn test_deduplicated_structs_equality() {
1185 let string_ref1 = DeduplicatedString {
1186 hash: 123,
1187 length: 10,
1188 ref_count: 1,
1189 };
1190 let string_ref2 = DeduplicatedString {
1191 hash: 123,
1192 length: 10,
1193 ref_count: 2, };
1195 let string_ref3 = DeduplicatedString {
1196 hash: 123,
1197 length: 10,
1198 ref_count: 1,
1199 };
1200
1201 assert_ne!(string_ref1, string_ref2); assert_eq!(string_ref1, string_ref3); let stack_ref1 = DeduplicatedStackTrace {
1205 hash: 456,
1206 frame_count: 5,
1207 ref_count: 1,
1208 };
1209 let stack_ref2 = DeduplicatedStackTrace {
1210 hash: 456,
1211 frame_count: 5,
1212 ref_count: 1,
1213 };
1214 assert_eq!(stack_ref1, stack_ref2);
1215
1216 let meta_ref1 = DeduplicatedMetadata {
1217 hash: 789,
1218 entry_count: 3,
1219 ref_count: 1,
1220 };
1221 let meta_ref2 = DeduplicatedMetadata {
1222 hash: 789,
1223 entry_count: 3,
1224 ref_count: 1,
1225 };
1226 assert_eq!(meta_ref1, meta_ref2);
1227 }
1228
1229 #[test]
1230 fn test_global_deduplicator() {
1231 let config = DeduplicationConfig::default();
1233 let deduplicator = ComprehensiveDataDeduplicator::new(config);
1234
1235 let test_string = "global_test";
1237 let result = deduplicator
1238 .deduplicate_string(test_string)
1239 .expect("Failed to deduplicate string");
1240 assert_eq!(result.length, test_string.len());
1241
1242 let _global = get_global_data_deduplicator();
1244 }
1245
1246 #[test]
1247 fn test_initialize_global_deduplicator() {
1248 let custom_config = DeduplicationConfig {
1249 enable_string_dedup: false,
1250 enable_stack_dedup: true,
1251 enable_metadata_dedup: false,
1252 max_cache_size: 1000,
1253 enable_compression: false,
1254 compression_threshold: 2048,
1255 enable_stats: false,
1256 cleanup_threshold: 0.5,
1257 };
1258
1259 let deduplicator = ComprehensiveDataDeduplicator::new(custom_config);
1261
1262 assert!(!deduplicator.config.enable_string_dedup);
1264 assert!(deduplicator.config.enable_stack_dedup);
1265 assert!(!deduplicator.config.enable_metadata_dedup);
1266 assert_eq!(deduplicator.config.max_cache_size, 1000);
1267 }
1268
1269 #[test]
1270 fn test_stats_serialization() {
1271 let stats = DeduplicationStats {
1272 strings_deduplicated: 10,
1273 stack_traces_deduplicated: 5,
1274 metadata_deduplicated: 3,
1275 memory_saved_bytes: 1024,
1276 compression_ratio: 0.75,
1277 cache_hit_rate: 0.85,
1278 total_operations: 20,
1279 cleanup_operations: 2,
1280 };
1281
1282 let serialized = serde_json::to_string(&stats).expect("Failed to serialize stats");
1283 let deserialized: DeduplicationStats =
1284 serde_json::from_str(&serialized).expect("Failed to deserialize stats");
1285
1286 assert_eq!(deserialized.strings_deduplicated, 10);
1287 assert_eq!(deserialized.stack_traces_deduplicated, 5);
1288 assert_eq!(deserialized.metadata_deduplicated, 3);
1289 assert_eq!(deserialized.memory_saved_bytes, 1024);
1290 assert!((deserialized.compression_ratio - 0.75).abs() < f64::EPSILON);
1291 assert!((deserialized.cache_hit_rate - 0.85).abs() < f64::EPSILON);
1292 assert_eq!(deserialized.total_operations, 20);
1293 assert_eq!(deserialized.cleanup_operations, 2);
1294 }
1295
1296 #[test]
1297 fn test_deduplicated_refs_serialization() {
1298 let string_ref = DeduplicatedString {
1299 hash: 123456789,
1300 length: 42,
1301 ref_count: 5,
1302 };
1303
1304 let serialized =
1305 serde_json::to_string(&string_ref).expect("Failed to serialize string ref");
1306 let deserialized: DeduplicatedString =
1307 serde_json::from_str(&serialized).expect("Failed to deserialize string ref");
1308
1309 assert_eq!(deserialized.hash, 123456789);
1310 assert_eq!(deserialized.length, 42);
1311 assert_eq!(deserialized.ref_count, 5);
1312 }
1313
1314 #[test]
1315 fn test_string_deduplication_deadlock_fix() {
1316 let config = DeduplicationConfig {
1319 enable_string_dedup: true,
1320 enable_stats: true,
1321 ..Default::default()
1322 };
1323
1324 let deduplicator = ComprehensiveDataDeduplicator::new(config);
1325 let test_string = "deadlock_test_string";
1326
1327 let result1 = deduplicator
1329 .deduplicate_string(test_string)
1330 .expect("First call should succeed");
1331 assert_eq!(result1.ref_count, 1);
1332 assert_eq!(result1.length, test_string.len());
1333
1334 let result2 = deduplicator
1336 .deduplicate_string(test_string)
1337 .expect("Second call should succeed without deadlock");
1338 assert_eq!(result2.ref_count, 2);
1339 assert_eq!(result2.hash, result1.hash);
1340 assert_eq!(result2.length, result1.length);
1341
1342 let result3 = deduplicator
1344 .deduplicate_string(test_string)
1345 .expect("Third call should succeed");
1346 assert_eq!(result3.ref_count, 3);
1347 assert_eq!(result3.hash, result1.hash);
1348
1349 let stats = deduplicator.get_stats().expect("Should get stats");
1351 assert!(stats.strings_deduplicated >= 2); }
1353
1354 #[test]
1355 fn test_concurrent_string_deduplication_safety() {
1356 use std::sync::Arc;
1357 use std::thread;
1358
1359 let config = DeduplicationConfig {
1360 enable_string_dedup: true,
1361 enable_stats: true,
1362 ..Default::default()
1363 };
1364
1365 let deduplicator = Arc::new(ComprehensiveDataDeduplicator::new(config));
1366 let test_string = "concurrent_test_string";
1367 let num_threads = 4;
1368 let calls_per_thread = 10;
1369
1370 let mut handles = vec![];
1371
1372 for thread_id in 0..num_threads {
1374 let dedup_clone = Arc::clone(&deduplicator);
1375 let test_str = test_string.to_string();
1376
1377 let handle = thread::spawn(move || {
1378 let mut results = vec![];
1379 for i in 0..calls_per_thread {
1380 let result = dedup_clone
1381 .deduplicate_string(&test_str)
1382 .unwrap_or_else(|_| panic!("Thread {thread_id} call {i} should succeed"));
1383 results.push(result);
1384 }
1385 results
1386 });
1387 handles.push(handle);
1388 }
1389
1390 let mut all_results = vec![];
1392 for handle in handles {
1393 let thread_results = handle.join().expect("Thread should complete successfully");
1394 all_results.extend(thread_results);
1395 }
1396
1397 let first_hash = all_results[0].hash;
1399 for result in &all_results {
1400 assert_eq!(result.hash, first_hash);
1401 assert_eq!(result.length, test_string.len());
1402 assert!(result.ref_count > 0);
1403 }
1404
1405 let final_result = deduplicator
1407 .deduplicate_string(test_string)
1408 .expect("Final call should succeed");
1409 let expected_final_count = (num_threads * calls_per_thread) + 1;
1410 assert_eq!(final_result.ref_count, expected_final_count);
1411 }
1412}