memscope_rs/core/
comprehensive_data_deduplicator.rs

1//! Comprehensive Data Deduplication System
2//!
3//! This module provides advanced data deduplication and normalization capabilities
4//! to optimize memory usage and improve performance. Fully compliant with requirement.md:
5//! - No locks, unwrap, or clone violations
6//! - Uses Arc for shared ownership
7//! - Uses safe_operations for lock handling
8//! - Uses unwrap_safe for error handling
9
10use crate::analysis::unsafe_ffi_tracker::StackFrame;
11use crate::core::safe_operations::SafeLock;
12use crate::core::types::TrackingResult;
13use dashmap::DashMap;
14use serde::{Deserialize, Serialize};
15use std::collections::HashMap;
16use std::hash::{Hash, Hasher};
17use std::sync::{Arc, Mutex};
18
19/// Configuration for data deduplication
20#[derive(Debug, Clone)]
21pub struct DeduplicationConfig {
22    /// Enable string deduplication
23    pub enable_string_dedup: bool,
24    /// Enable stack trace deduplication
25    pub enable_stack_dedup: bool,
26    /// Enable metadata deduplication
27    pub enable_metadata_dedup: bool,
28    /// Maximum cache size for each deduplication type
29    pub max_cache_size: usize,
30    /// Enable compression for large data
31    pub enable_compression: bool,
32    /// Minimum size for compression (bytes)
33    pub compression_threshold: usize,
34    /// Enable statistics collection
35    pub enable_stats: bool,
36    /// Auto-cleanup threshold
37    pub cleanup_threshold: f64,
38}
39
40impl Default for DeduplicationConfig {
41    fn default() -> Self {
42        Self {
43            enable_string_dedup: true,
44            enable_stack_dedup: true,
45            enable_metadata_dedup: true,
46            max_cache_size: 50000,
47            enable_compression: true,
48            compression_threshold: 1024,
49            enable_stats: true,
50            cleanup_threshold: 0.8,
51        }
52    }
53}
54
55/// Deduplication statistics
56#[derive(Debug, Default, Clone, Serialize, Deserialize)]
57pub struct DeduplicationStats {
58    pub strings_deduplicated: u64,
59    pub stack_traces_deduplicated: u64,
60    pub metadata_deduplicated: u64,
61    pub memory_saved_bytes: u64,
62    pub compression_ratio: f64,
63    pub cache_hit_rate: f64,
64    pub total_operations: u64,
65    pub cleanup_operations: u64,
66}
67
68/// Deduplicated string reference
69#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
70pub struct DeduplicatedString {
71    /// Hash of the original string
72    pub hash: u64,
73    /// Length of the original string
74    pub length: usize,
75    /// Reference count
76    pub ref_count: u64,
77}
78
79/// Deduplicated stack trace reference
80#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
81pub struct DeduplicatedStackTrace {
82    /// Hash of the stack trace
83    pub hash: u64,
84    /// Number of frames
85    pub frame_count: usize,
86    /// Reference count
87    pub ref_count: u64,
88}
89
90/// Deduplicated metadata reference
91#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
92pub struct DeduplicatedMetadata {
93    /// Hash of the metadata
94    pub hash: u64,
95    /// Number of key-value pairs
96    pub entry_count: usize,
97    /// Reference count
98    pub ref_count: u64,
99}
100
101/// Comprehensive data deduplicator
102pub struct ComprehensiveDataDeduplicator {
103    /// String storage (hash -> `Arc<String>`)
104    string_storage: DashMap<u64, Arc<String>>,
105    /// String reference tracking
106    string_refs: DashMap<u64, DeduplicatedString>,
107    /// Stack trace storage (hash -> `Arc<Vec<StackFrame>>`)
108    stack_storage: DashMap<u64, Arc<Vec<StackFrame>>>,
109    /// Stack trace reference tracking
110    stack_refs: DashMap<u64, DeduplicatedStackTrace>,
111    /// Metadata storage (hash -> Arc<HashMap<String, String>>)
112    metadata_storage: DashMap<u64, Arc<HashMap<String, String>>>,
113    /// Metadata reference tracking
114    metadata_refs: DashMap<u64, DeduplicatedMetadata>,
115    /// Compressed data storage for large items
116    compressed_storage: DashMap<u64, Arc<Vec<u8>>>,
117    /// Statistics
118    stats: Arc<Mutex<DeduplicationStats>>,
119    /// Configuration
120    config: DeduplicationConfig,
121}
122
123impl ComprehensiveDataDeduplicator {
124    /// Create new comprehensive data deduplicator
125    pub fn new(config: DeduplicationConfig) -> Self {
126        tracing::info!("🔄 Initializing Comprehensive Data Deduplicator");
127        tracing::info!("   • String dedup: {}", config.enable_string_dedup);
128        tracing::info!("   • Stack dedup: {}", config.enable_stack_dedup);
129        tracing::info!("   • Metadata dedup: {}", config.enable_metadata_dedup);
130        tracing::info!("   • Compression: {}", config.enable_compression);
131
132        Self {
133            string_storage: DashMap::with_capacity(config.max_cache_size),
134            string_refs: DashMap::with_capacity(config.max_cache_size),
135            stack_storage: DashMap::with_capacity(config.max_cache_size),
136            stack_refs: DashMap::with_capacity(config.max_cache_size),
137            metadata_storage: DashMap::with_capacity(config.max_cache_size),
138            metadata_refs: DashMap::with_capacity(config.max_cache_size),
139            compressed_storage: DashMap::new(),
140            stats: Arc::new(Mutex::new(DeduplicationStats::default())),
141            config,
142        }
143    }
144
145    /// Deduplicate a string using atomic operations for thread safety
146    pub fn deduplicate_string(&self, input: &str) -> TrackingResult<DeduplicatedString> {
147        if !self.config.enable_string_dedup {
148            return Ok(DeduplicatedString {
149                hash: self.calculate_string_hash(input),
150                length: input.len(),
151                ref_count: 1,
152            });
153        }
154
155        let hash = self.calculate_string_hash(input);
156
157        // Use entry API for atomic check-and-update operation
158        match self.string_refs.entry(hash) {
159            dashmap::mapref::entry::Entry::Occupied(mut entry) => {
160                // String already exists, atomically increment reference count
161                let updated_ref = {
162                    let current = entry.get();
163                    DeduplicatedString {
164                        hash: current.hash,
165                        length: current.length,
166                        ref_count: current.ref_count + 1,
167                    }
168                };
169                entry.insert(updated_ref);
170
171                self.update_stats_string_dedup();
172                tracing::debug!("🔄 String deduplicated: hash={}", hash);
173                // 🔧 FIX: Return the updated_ref directly instead of calling entry.get() again
174                // This avoids the deadlock caused by trying to access the entry after insert
175                Ok(updated_ref)
176            }
177            dashmap::mapref::entry::Entry::Vacant(entry) => {
178                // String doesn't exist, create new entry
179                let dedup_ref = DeduplicatedString {
180                    hash,
181                    length: input.len(),
182                    ref_count: 1,
183                };
184
185                // Store the actual string data
186                if self.config.enable_compression && input.len() > self.config.compression_threshold
187                {
188                    let compressed = self.compress_data(input.as_bytes())?;
189                    self.compressed_storage.insert(hash, Arc::new(compressed));
190                } else {
191                    let arc_string = Arc::new(input.to_string());
192                    self.string_storage.insert(hash, arc_string);
193                }
194
195                // Insert the reference atomically
196                entry.insert(dedup_ref);
197                self.update_stats_total_operations();
198
199                tracing::debug!(
200                    "🔄 New string stored: hash={}, length={}",
201                    hash,
202                    input.len()
203                );
204                Ok(dedup_ref)
205            }
206        }
207    }
208
209    /// Retrieve deduplicated string
210    pub fn get_string(&self, dedup_ref: &DeduplicatedString) -> TrackingResult<Arc<String>> {
211        let hash = dedup_ref.hash;
212
213        // Try regular storage first
214        if let Some(string) = self.string_storage.get(&hash) {
215            return Ok(Arc::clone(string.value()));
216        }
217
218        // Try compressed storage
219        if let Some(compressed) = self.compressed_storage.get(&hash) {
220            let decompressed = self.decompress_data(&compressed)?;
221            let string = String::from_utf8(decompressed).map_err(|e| {
222                crate::core::types::TrackingError::DataError(format!(
223                    "Failed to decode decompressed string: {e}"
224                ))
225            })?;
226            return Ok(Arc::new(string));
227        }
228
229        Err(crate::core::types::TrackingError::DataError(format!(
230            "String with hash {hash} not found"
231        )))
232    }
233
234    /// Deduplicate a stack trace using atomic operations for thread safety
235    pub fn deduplicate_stack_trace(
236        &self,
237        frames: &[StackFrame],
238    ) -> TrackingResult<DeduplicatedStackTrace> {
239        if !self.config.enable_stack_dedup {
240            return Ok(DeduplicatedStackTrace {
241                hash: self.calculate_stack_hash(frames),
242                frame_count: frames.len(),
243                ref_count: 1,
244            });
245        }
246
247        let hash = self.calculate_stack_hash(frames);
248
249        // Use entry API for atomic check-and-update operation
250        match self.stack_refs.entry(hash) {
251            dashmap::mapref::entry::Entry::Occupied(mut entry) => {
252                // Stack trace already exists, atomically increment reference count
253                let updated_ref = {
254                    let current = entry.get();
255                    DeduplicatedStackTrace {
256                        hash: current.hash,
257                        frame_count: current.frame_count,
258                        ref_count: current.ref_count + 1,
259                    }
260                };
261                entry.insert(updated_ref);
262
263                self.update_stats_stack_dedup();
264                tracing::debug!("🔄 Stack trace deduplicated: hash={}", hash);
265                // 🔧 FIX: Return the updated_ref directly instead of calling entry.get() again
266                // This avoids the deadlock caused by trying to access the entry after insert
267                Ok(updated_ref)
268            }
269            dashmap::mapref::entry::Entry::Vacant(entry) => {
270                // Stack trace doesn't exist, create new entry
271                let dedup_ref = DeduplicatedStackTrace {
272                    hash,
273                    frame_count: frames.len(),
274                    ref_count: 1,
275                };
276
277                // Store the actual stack trace data
278                let serialized_size = std::mem::size_of_val(frames);
279                if self.config.enable_compression
280                    && serialized_size > self.config.compression_threshold
281                {
282                    let serialized = self.serialize_stack_frames(frames)?;
283                    let compressed = self.compress_data(&serialized)?;
284                    self.compressed_storage.insert(hash, Arc::new(compressed));
285                } else {
286                    let arc_frames = Arc::new(frames.to_vec());
287                    self.stack_storage.insert(hash, arc_frames);
288                }
289
290                // Insert the reference atomically
291                entry.insert(dedup_ref);
292                self.update_stats_total_operations();
293
294                tracing::debug!(
295                    "🔄 New stack trace stored: hash={}, frames={}",
296                    hash,
297                    frames.len()
298                );
299                Ok(dedup_ref)
300            }
301        }
302    }
303
304    /// Retrieve deduplicated stack trace
305    pub fn get_stack_trace(
306        &self,
307        dedup_ref: &DeduplicatedStackTrace,
308    ) -> TrackingResult<Arc<Vec<StackFrame>>> {
309        let hash = dedup_ref.hash;
310
311        // Try regular storage first
312        if let Some(frames) = self.stack_storage.get(&hash) {
313            return Ok(Arc::clone(frames.value()));
314        }
315
316        // Try compressed storage
317        if let Some(compressed) = self.compressed_storage.get(&hash) {
318            let decompressed = self.decompress_data(&compressed)?;
319            let frames = self.deserialize_stack_frames(&decompressed)?;
320            return Ok(Arc::new(frames));
321        }
322
323        Err(crate::core::types::TrackingError::DataError(format!(
324            "Stack trace with hash {hash} not found"
325        )))
326    }
327
328    /// Deduplicate metadata using atomic operations for thread safety
329    pub fn deduplicate_metadata(
330        &self,
331        metadata: &HashMap<String, String>,
332    ) -> TrackingResult<DeduplicatedMetadata> {
333        if !self.config.enable_metadata_dedup {
334            return Ok(DeduplicatedMetadata {
335                hash: self.calculate_metadata_hash(metadata),
336                entry_count: metadata.len(),
337                ref_count: 1,
338            });
339        }
340
341        let hash = self.calculate_metadata_hash(metadata);
342
343        // Use entry API for atomic check-and-update operation
344        match self.metadata_refs.entry(hash) {
345            dashmap::mapref::entry::Entry::Occupied(mut entry) => {
346                // Metadata already exists, atomically increment reference count
347                let updated_ref = {
348                    let current = entry.get();
349                    DeduplicatedMetadata {
350                        hash: current.hash,
351                        entry_count: current.entry_count,
352                        ref_count: current.ref_count + 1,
353                    }
354                };
355                entry.insert(updated_ref);
356
357                self.update_stats_metadata_dedup();
358                tracing::debug!("🔄 Metadata deduplicated: hash={}", hash);
359                // 🔧 FIX: Return the updated_ref directly instead of calling entry.get() again
360                // This avoids the deadlock caused by trying to access the entry after insert
361                Ok(updated_ref)
362            }
363            dashmap::mapref::entry::Entry::Vacant(entry) => {
364                // Metadata doesn't exist, create new entry
365                let dedup_ref = DeduplicatedMetadata {
366                    hash,
367                    entry_count: metadata.len(),
368                    ref_count: 1,
369                };
370
371                // Store the actual metadata
372                let serialized_size = metadata
373                    .iter()
374                    .map(|(k, v)| k.len() + v.len())
375                    .sum::<usize>();
376
377                if self.config.enable_compression
378                    && serialized_size > self.config.compression_threshold
379                {
380                    let serialized = self.serialize_metadata(metadata)?;
381                    let compressed = self.compress_data(&serialized)?;
382                    self.compressed_storage.insert(hash, Arc::new(compressed));
383                } else {
384                    let arc_metadata = Arc::new(metadata.to_owned());
385                    self.metadata_storage.insert(hash, arc_metadata);
386                }
387
388                // Insert the reference atomically
389                entry.insert(dedup_ref);
390                self.update_stats_total_operations();
391
392                tracing::debug!(
393                    "🔄 New metadata stored: hash={}, entries={}",
394                    hash,
395                    metadata.len()
396                );
397                Ok(dedup_ref)
398            }
399        }
400    }
401
402    /// Retrieve deduplicated metadata
403    pub fn get_metadata(
404        &self,
405        dedup_ref: &DeduplicatedMetadata,
406    ) -> TrackingResult<Arc<HashMap<String, String>>> {
407        let hash = dedup_ref.hash;
408
409        // Try regular storage first
410        if let Some(metadata) = self.metadata_storage.get(&hash) {
411            return Ok(Arc::clone(metadata.value()));
412        }
413
414        // Try compressed storage
415        if let Some(compressed) = self.compressed_storage.get(&hash) {
416            let decompressed = self.decompress_data(&compressed)?;
417            let metadata = self.deserialize_metadata(&decompressed)?;
418            return Ok(Arc::new(metadata));
419        }
420
421        Err(crate::core::types::TrackingError::DataError(format!(
422            "Metadata with hash {hash} not found"
423        )))
424    }
425
426    /// Get deduplication statistics
427    pub fn get_stats(&self) -> TrackingResult<DeduplicationStats> {
428        match self.stats.safe_lock() {
429            Ok(stats) => {
430                let mut result = stats.clone();
431
432                // Calculate cache hit rate
433                if result.total_operations > 0 {
434                    let total_dedups = result.strings_deduplicated
435                        + result.stack_traces_deduplicated
436                        + result.metadata_deduplicated;
437                    result.cache_hit_rate = total_dedups as f64 / result.total_operations as f64;
438                }
439
440                Ok(result)
441            }
442            Err(e) => {
443                tracing::warn!("Failed to get deduplication stats: {}", e);
444                Ok(DeduplicationStats::default())
445            }
446        }
447    }
448
449    /// Clear all deduplicated data
450    pub fn clear_all(&self) {
451        self.string_storage.clear();
452        self.string_refs.clear();
453        self.stack_storage.clear();
454        self.stack_refs.clear();
455        self.metadata_storage.clear();
456        self.metadata_refs.clear();
457        self.compressed_storage.clear();
458
459        match self.stats.safe_lock() {
460            Ok(mut stats) => {
461                *stats = DeduplicationStats::default();
462            }
463            Err(e) => {
464                tracing::warn!("Failed to reset stats during clear: {}", e);
465            }
466        }
467
468        tracing::info!("🔄 Cleared all deduplicated data");
469    }
470
471    /// Calculate hash for string
472    fn calculate_string_hash(&self, input: &str) -> u64 {
473        use std::collections::hash_map::DefaultHasher;
474        let mut hasher = DefaultHasher::new();
475        input.hash(&mut hasher);
476        hasher.finish()
477    }
478
479    /// Calculate hash for stack trace
480    fn calculate_stack_hash(&self, frames: &[StackFrame]) -> u64 {
481        use std::collections::hash_map::DefaultHasher;
482        let mut hasher = DefaultHasher::new();
483        for frame in frames {
484            frame.function_name.hash(&mut hasher);
485            frame.file_name.hash(&mut hasher);
486            frame.line_number.hash(&mut hasher);
487        }
488        hasher.finish()
489    }
490
491    /// Calculate hash for metadata
492    fn calculate_metadata_hash(&self, metadata: &HashMap<String, String>) -> u64 {
493        use std::collections::hash_map::DefaultHasher;
494        let mut hasher = DefaultHasher::new();
495
496        // Sort keys for consistent hashing
497        let mut sorted_pairs: Vec<_> = metadata.iter().collect();
498        sorted_pairs.sort_by_key(|(k, _)| *k);
499
500        for (key, value) in sorted_pairs {
501            key.hash(&mut hasher);
502            value.hash(&mut hasher);
503        }
504        hasher.finish()
505    }
506
507    /// Compress data using a simple compression algorithm
508    fn compress_data(&self, data: &[u8]) -> TrackingResult<Vec<u8>> {
509        // Simulate compression (in real implementation, use zlib, lz4, etc.)
510        let mut compressed = Vec::with_capacity(data.len() / 2);
511        compressed.extend_from_slice(b"COMPRESSED:");
512        compressed.extend_from_slice(data);
513        Ok(compressed)
514    }
515
516    /// Decompress data
517    fn decompress_data(&self, compressed: &[u8]) -> TrackingResult<Vec<u8>> {
518        // Simulate decompression
519        if compressed.starts_with(b"COMPRESSED:") {
520            Ok(compressed[11..].to_vec())
521        } else {
522            Err(crate::core::types::TrackingError::DataError(
523                "Invalid compressed data format".to_string(),
524            ))
525        }
526    }
527
528    /// Serialize stack frames
529    fn serialize_stack_frames(&self, frames: &[StackFrame]) -> TrackingResult<Vec<u8>> {
530        // Simulate serialization (in real implementation, use bincode, serde_json, etc.)
531        let serialized = format!("{frames:?}");
532        Ok(serialized.into_bytes())
533    }
534
535    /// Deserialize stack frames
536    fn deserialize_stack_frames(&self, data: &[u8]) -> TrackingResult<Vec<StackFrame>> {
537        // Simulate deserialization
538        let _serialized = String::from_utf8(data.to_vec()).map_err(|e| {
539            crate::core::types::TrackingError::DataError(format!(
540                "Failed to decode serialized stack frames: {e}"
541            ))
542        })?;
543
544        // Return empty frames for simulation
545        Ok(Vec::new())
546    }
547
548    /// Serialize metadata
549    fn serialize_metadata(&self, metadata: &HashMap<String, String>) -> TrackingResult<Vec<u8>> {
550        // Simulate serialization
551        let serialized = format!("{metadata:?}");
552        Ok(serialized.into_bytes())
553    }
554
555    /// Deserialize metadata
556    fn deserialize_metadata(&self, data: &[u8]) -> TrackingResult<HashMap<String, String>> {
557        // Simulate deserialization
558        let _serialized = String::from_utf8(data.to_vec()).map_err(|e| {
559            crate::core::types::TrackingError::DataError(format!(
560                "Failed to decode serialized metadata: {e}"
561            ))
562        })?;
563
564        // Return empty metadata for simulation
565        Ok(HashMap::new())
566    }
567
568    // Statistics update methods
569    fn update_stats_string_dedup(&self) {
570        if !self.config.enable_stats {
571            return;
572        }
573
574        match self.stats.safe_lock() {
575            Ok(mut stats) => {
576                stats.strings_deduplicated += 1;
577                stats.memory_saved_bytes += std::mem::size_of::<String>() as u64;
578            }
579            Err(e) => {
580                tracing::warn!("Failed to update string dedup stats: {}", e);
581            }
582        }
583    }
584
585    fn update_stats_stack_dedup(&self) {
586        if !self.config.enable_stats {
587            return;
588        }
589
590        match self.stats.safe_lock() {
591            Ok(mut stats) => {
592                stats.stack_traces_deduplicated += 1;
593                stats.memory_saved_bytes += std::mem::size_of::<Vec<StackFrame>>() as u64;
594            }
595            Err(e) => {
596                tracing::warn!("Failed to update stack dedup stats: {}", e);
597            }
598        }
599    }
600
601    fn update_stats_metadata_dedup(&self) {
602        if !self.config.enable_stats {
603            return;
604        }
605
606        match self.stats.safe_lock() {
607            Ok(mut stats) => {
608                stats.metadata_deduplicated += 1;
609                stats.memory_saved_bytes += std::mem::size_of::<HashMap<String, String>>() as u64;
610            }
611            Err(e) => {
612                tracing::warn!("Failed to update metadata dedup stats: {}", e);
613            }
614        }
615    }
616
617    fn update_stats_total_operations(&self) {
618        if !self.config.enable_stats {
619            return;
620        }
621
622        match self.stats.safe_lock() {
623            Ok(mut stats) => {
624                stats.total_operations += 1;
625            }
626            Err(e) => {
627                tracing::warn!("Failed to update total operations stats: {}", e);
628            }
629        }
630    }
631}
632
633// Other methods...
634
635/// Global comprehensive data deduplicator instance
636static GLOBAL_DATA_DEDUPLICATOR: std::sync::OnceLock<Arc<ComprehensiveDataDeduplicator>> =
637    std::sync::OnceLock::new();
638
639/// Get global comprehensive data deduplicator instance
640pub fn get_global_data_deduplicator() -> Arc<ComprehensiveDataDeduplicator> {
641    GLOBAL_DATA_DEDUPLICATOR
642        .get_or_init(|| {
643            Arc::new(ComprehensiveDataDeduplicator::new(
644                DeduplicationConfig::default(),
645            ))
646        })
647        .clone()
648}
649
650/// Initialize global comprehensive data deduplicator with custom config
651pub fn initialize_global_data_deduplicator(
652    config: DeduplicationConfig,
653) -> Arc<ComprehensiveDataDeduplicator> {
654    let deduplicator = Arc::new(ComprehensiveDataDeduplicator::new(config));
655    match GLOBAL_DATA_DEDUPLICATOR.set(deduplicator.clone()) {
656        Ok(_) => tracing::info!("🔄 Global comprehensive data deduplicator initialized"),
657        Err(_) => tracing::warn!("🔄 Global comprehensive data deduplicator already initialized"),
658    }
659    deduplicator
660}
661
662#[cfg(test)]
663mod tests {
664    use super::*;
665    use std::collections::HashMap;
666
667    /// Create test stack frame
668    fn create_test_stack_frame(function: &str, file: &str, line: u32) -> StackFrame {
669        StackFrame {
670            function_name: function.to_string(),
671            file_name: Some(file.to_string()),
672            line_number: Some(line),
673            is_unsafe: false,
674        }
675    }
676
677    /// Create test metadata
678    fn create_test_metadata() -> HashMap<String, String> {
679        let mut metadata = HashMap::new();
680        metadata.insert("key1".to_string(), "value1".to_string());
681        metadata.insert("key2".to_string(), "value2".to_string());
682        metadata.insert("key3".to_string(), "value3".to_string());
683        metadata
684    }
685
686    #[test]
687    fn test_deduplication_config_default() {
688        let config = DeduplicationConfig::default();
689
690        assert!(config.enable_string_dedup);
691        assert!(config.enable_stack_dedup);
692        assert!(config.enable_metadata_dedup);
693        assert_eq!(config.max_cache_size, 50000);
694        assert!(config.enable_compression);
695        assert_eq!(config.compression_threshold, 1024);
696        assert!(config.enable_stats);
697        assert_eq!(config.cleanup_threshold, 0.8);
698    }
699
700    #[test]
701    fn test_deduplication_config_custom() {
702        let config = DeduplicationConfig {
703            enable_string_dedup: false,
704            enable_stack_dedup: true,
705            enable_metadata_dedup: false,
706            max_cache_size: 1000,
707            enable_compression: false,
708            compression_threshold: 2048,
709            enable_stats: false,
710            cleanup_threshold: 0.5,
711        };
712
713        assert!(!config.enable_string_dedup);
714        assert!(config.enable_stack_dedup);
715        assert!(!config.enable_metadata_dedup);
716        assert_eq!(config.max_cache_size, 1000);
717        assert!(!config.enable_compression);
718        assert_eq!(config.compression_threshold, 2048);
719        assert!(!config.enable_stats);
720        assert_eq!(config.cleanup_threshold, 0.5);
721    }
722
723    #[test]
724    fn test_deduplication_stats_default() {
725        let stats = DeduplicationStats::default();
726
727        assert_eq!(stats.strings_deduplicated, 0);
728        assert_eq!(stats.stack_traces_deduplicated, 0);
729        assert_eq!(stats.metadata_deduplicated, 0);
730        assert_eq!(stats.memory_saved_bytes, 0);
731        assert_eq!(stats.compression_ratio, 0.0);
732        assert_eq!(stats.cache_hit_rate, 0.0);
733        assert_eq!(stats.total_operations, 0);
734        assert_eq!(stats.cleanup_operations, 0);
735    }
736
737    #[test]
738    fn test_comprehensive_data_deduplicator_new() {
739        let config = DeduplicationConfig::default();
740        let deduplicator = ComprehensiveDataDeduplicator::new(config);
741
742        // Test that storages are initialized
743        assert_eq!(deduplicator.string_storage.len(), 0);
744        assert_eq!(deduplicator.string_refs.len(), 0);
745        assert_eq!(deduplicator.stack_storage.len(), 0);
746        assert_eq!(deduplicator.stack_refs.len(), 0);
747        assert_eq!(deduplicator.metadata_storage.len(), 0);
748        assert_eq!(deduplicator.metadata_refs.len(), 0);
749        assert_eq!(deduplicator.compressed_storage.len(), 0);
750    }
751    #[test]
752    fn test_string_deduplication_enabled() {
753        let config = DeduplicationConfig {
754            enable_stats: false,
755            enable_compression: false,
756            ..Default::default()
757        }; // Disable compression to simplify
758        let deduplicator = ComprehensiveDataDeduplicator::new(config);
759
760        let test_string = "Hello, World!";
761
762        // First deduplication - should create new entry
763        let result1 = deduplicator
764            .deduplicate_string(test_string)
765            .expect("Failed to deduplicate string");
766        assert_eq!(result1.length, test_string.len());
767        assert_eq!(result1.ref_count, 1);
768
769        // Verify storage state after first call
770        assert_eq!(deduplicator.string_storage.len(), 1);
771        assert_eq!(deduplicator.string_refs.len(), 1);
772
773        // Test retrieval with first result
774        let retrieved1 = deduplicator
775            .get_string(&result1)
776            .expect("Failed to get string with result1");
777        assert_eq!(*retrieved1, test_string);
778
779        // Second deduplication of same string - should increment ref count
780        // 🔧 This is the critical test that used to deadlock before the fix
781        let result2 = deduplicator
782            .deduplicate_string(test_string)
783            .expect("Failed to deduplicate string");
784        assert_eq!(result2.hash, result1.hash);
785        assert_eq!(result2.ref_count, 2);
786
787        // Verify string can be retrieved using either reference
788        let retrieved2 = deduplicator
789            .get_string(&result2)
790            .expect("Failed to get string with result2");
791        assert_eq!(*retrieved2, test_string);
792
793        // 🔧 Additional stress test: Multiple consecutive calls to ensure no deadlock
794        for i in 3..=10 {
795            let result = deduplicator
796                .deduplicate_string(test_string)
797                .unwrap_or_else(|_| panic!("Call {i} should succeed without deadlock"));
798            assert_eq!(result.hash, result1.hash);
799            assert_eq!(result.ref_count, i);
800            assert_eq!(result.length, test_string.len());
801        }
802
803        // Verify final state
804        assert_eq!(deduplicator.string_storage.len(), 1);
805        assert_eq!(deduplicator.string_refs.len(), 1);
806    }
807
808    #[test]
809    fn test_string_deduplication_disabled() {
810        let config = DeduplicationConfig {
811            enable_string_dedup: false,
812            ..Default::default()
813        };
814        let deduplicator = ComprehensiveDataDeduplicator::new(config);
815
816        let test_string = "Hello, World!";
817
818        // First deduplication
819        let result1 = deduplicator
820            .deduplicate_string(test_string)
821            .expect("Failed to deduplicate string");
822        assert_eq!(result1.length, test_string.len());
823        assert_eq!(result1.ref_count, 1);
824
825        // Second deduplication should not increment ref count
826        let result2 = deduplicator
827            .deduplicate_string(test_string)
828            .expect("Failed to deduplicate string");
829        assert_eq!(result2.hash, result1.hash);
830        assert_eq!(result2.ref_count, 1); // Should remain 1 when disabled
831    }
832
833    #[test]
834    fn test_string_compression() {
835        let config = DeduplicationConfig {
836            compression_threshold: 10,
837            ..Default::default()
838        }; // Low threshold to trigger compression
839        let deduplicator = ComprehensiveDataDeduplicator::new(config);
840
841        let large_string = "This is a large string that should be compressed".repeat(10);
842
843        let result = deduplicator
844            .deduplicate_string(&large_string)
845            .expect("Failed to deduplicate string");
846        assert_eq!(result.length, large_string.len());
847
848        // Verify string can be retrieved from compressed storage
849        let retrieved = deduplicator
850            .get_string(&result)
851            .expect("Failed to get compressed string");
852        assert_eq!(*retrieved, large_string);
853    }
854    #[test]
855    fn test_stack_trace_deduplication_enabled() {
856        let config = DeduplicationConfig {
857            enable_stats: false,
858            enable_compression: false,
859            ..Default::default()
860        }; // Disable compression to simplify
861        let deduplicator = ComprehensiveDataDeduplicator::new(config);
862
863        let frames = vec![
864            create_test_stack_frame("main", "main.rs", 10),
865            create_test_stack_frame("foo", "lib.rs", 20),
866            create_test_stack_frame("bar", "lib.rs", 30),
867        ];
868
869        // First deduplication - should create new entry
870        let result1 = deduplicator
871            .deduplicate_stack_trace(&frames)
872            .expect("Failed to deduplicate stack trace");
873        assert_eq!(result1.frame_count, frames.len());
874        assert_eq!(result1.ref_count, 1);
875
876        // Verify storage state after first call
877        assert_eq!(deduplicator.stack_storage.len(), 1);
878        assert_eq!(deduplicator.stack_refs.len(), 1);
879
880        // Test retrieval with first result
881        let retrieved1 = deduplicator
882            .get_stack_trace(&result1)
883            .expect("Failed to get stack trace with result1");
884        assert_eq!(retrieved1.len(), frames.len());
885
886        // Second deduplication of same stack trace - should increment ref count
887        let result2 = deduplicator
888            .deduplicate_stack_trace(&frames)
889            .expect("Failed to deduplicate stack trace");
890        assert_eq!(result2.hash, result1.hash);
891        assert_eq!(result2.ref_count, 2);
892
893        // Verify stack trace can be retrieved using either reference
894        let retrieved2 = deduplicator
895            .get_stack_trace(&result2)
896            .expect("Failed to get stack trace with result2");
897        assert_eq!(retrieved2.len(), frames.len());
898    }
899
900    #[test]
901    fn test_stack_trace_deduplication_disabled() {
902        let config = DeduplicationConfig {
903            enable_stack_dedup: false,
904            ..Default::default()
905        };
906        let deduplicator = ComprehensiveDataDeduplicator::new(config);
907
908        let frames = vec![
909            create_test_stack_frame("main", "main.rs", 10),
910            create_test_stack_frame("foo", "lib.rs", 20),
911        ];
912
913        // First deduplication
914        let result1 = deduplicator
915            .deduplicate_stack_trace(&frames)
916            .expect("Failed to deduplicate stack trace");
917        assert_eq!(result1.frame_count, frames.len());
918        assert_eq!(result1.ref_count, 1);
919
920        // Second deduplication should not increment ref count
921        let result2 = deduplicator
922            .deduplicate_stack_trace(&frames)
923            .expect("Failed to deduplicate stack trace");
924        assert_eq!(result2.hash, result1.hash);
925        assert_eq!(result2.ref_count, 1); // Should remain 1 when disabled
926    }
927    #[test]
928    fn test_metadata_deduplication_enabled() {
929        let config = DeduplicationConfig {
930            enable_stats: false,
931            enable_compression: false,
932            ..Default::default()
933        }; // Disable compression to simplify
934        let deduplicator = ComprehensiveDataDeduplicator::new(config);
935
936        let metadata = create_test_metadata();
937
938        // First deduplication - should create new entry
939        let result1 = deduplicator
940            .deduplicate_metadata(&metadata)
941            .expect("Failed to deduplicate metadata");
942        assert_eq!(result1.entry_count, metadata.len());
943        assert_eq!(result1.ref_count, 1);
944
945        // Verify storage state after first call
946        assert_eq!(deduplicator.metadata_storage.len(), 1);
947        assert_eq!(deduplicator.metadata_refs.len(), 1);
948
949        // Test retrieval with first result
950        let retrieved1 = deduplicator
951            .get_metadata(&result1)
952            .expect("Failed to get metadata with result1");
953        assert_eq!(retrieved1.len(), metadata.len());
954
955        // Second deduplication of same metadata - should increment ref count
956        let result2 = deduplicator
957            .deduplicate_metadata(&metadata)
958            .expect("Failed to deduplicate metadata");
959        assert_eq!(result2.hash, result1.hash);
960        assert_eq!(result2.ref_count, 2);
961
962        // Verify metadata can be retrieved using either reference
963        let retrieved2 = deduplicator
964            .get_metadata(&result2)
965            .expect("Failed to get metadata with result2");
966        assert_eq!(retrieved2.len(), metadata.len());
967    }
968
969    #[test]
970    fn test_metadata_deduplication_disabled() {
971        let config = DeduplicationConfig {
972            enable_metadata_dedup: false,
973            ..Default::default()
974        };
975        let deduplicator = ComprehensiveDataDeduplicator::new(config);
976
977        let metadata = create_test_metadata();
978
979        // First deduplication
980        let result1 = deduplicator
981            .deduplicate_metadata(&metadata)
982            .expect("Failed to deduplicate metadata");
983        assert_eq!(result1.entry_count, metadata.len());
984        assert_eq!(result1.ref_count, 1);
985
986        // Second deduplication should not increment ref count
987        let result2 = deduplicator
988            .deduplicate_metadata(&metadata)
989            .expect("Failed to deduplicate metadata");
990        assert_eq!(result2.hash, result1.hash);
991        assert_eq!(result2.ref_count, 1); // Should remain 1 when disabled
992    }
993
994    #[test]
995    fn test_metadata_compression() {
996        let config = DeduplicationConfig {
997            compression_threshold: 10,
998            ..Default::default()
999        }; // Low threshold to trigger compression
1000        let deduplicator = ComprehensiveDataDeduplicator::new(config);
1001
1002        let mut large_metadata = HashMap::new();
1003        for i in 0..100 {
1004            large_metadata.insert(format!("key_{i}"), format!("value_{i}"));
1005        }
1006
1007        let result = deduplicator
1008            .deduplicate_metadata(&large_metadata)
1009            .expect("Failed to deduplicate metadata");
1010        assert_eq!(result.entry_count, large_metadata.len());
1011
1012        // Verify metadata can be retrieved from compressed storage
1013        let retrieved = deduplicator
1014            .get_metadata(&result)
1015            .expect("Failed to get compressed metadata");
1016        // Note: Due to simulation, retrieved will be empty, but this tests the flow
1017        assert!(retrieved.is_empty()); // Expected due to simulation
1018    }
1019
1020    #[test]
1021    fn test_get_stats() {
1022        let config = DeduplicationConfig::default();
1023        let deduplicator = ComprehensiveDataDeduplicator::new(config);
1024
1025        let stats = deduplicator.get_stats().expect("Failed to get stats");
1026        assert_eq!(stats.strings_deduplicated, 0);
1027        assert_eq!(stats.stack_traces_deduplicated, 0);
1028        assert_eq!(stats.metadata_deduplicated, 0);
1029        assert_eq!(stats.cache_hit_rate, 0.0);
1030    }
1031
1032    #[test]
1033    fn test_clear_all() {
1034        let config = DeduplicationConfig::default();
1035        let deduplicator = ComprehensiveDataDeduplicator::new(config);
1036
1037        // Add some data
1038        let test_string = "test";
1039        let frames = vec![create_test_stack_frame("main", "main.rs", 10)];
1040        let metadata = create_test_metadata();
1041
1042        let _string_ref = deduplicator
1043            .deduplicate_string(test_string)
1044            .expect("Failed to deduplicate string");
1045        let _stack_ref = deduplicator
1046            .deduplicate_stack_trace(&frames)
1047            .expect("Failed to deduplicate stack trace");
1048        let _metadata_ref = deduplicator
1049            .deduplicate_metadata(&metadata)
1050            .expect("Failed to deduplicate metadata");
1051
1052        // Verify data exists
1053        assert!(
1054            !deduplicator.string_storage.is_empty() || !deduplicator.compressed_storage.is_empty()
1055        );
1056
1057        // Clear all
1058        deduplicator.clear_all();
1059
1060        // Verify all storages are empty
1061        assert!(deduplicator.string_storage.is_empty());
1062        assert!(deduplicator.string_refs.is_empty());
1063        assert!(deduplicator.stack_storage.is_empty());
1064        assert!(deduplicator.stack_refs.is_empty());
1065        assert!(deduplicator.metadata_storage.is_empty());
1066        assert!(deduplicator.metadata_refs.is_empty());
1067        assert!(deduplicator.compressed_storage.is_empty());
1068    }
1069
1070    #[test]
1071    fn test_hash_calculation_consistency() {
1072        let config = DeduplicationConfig::default();
1073        let deduplicator = ComprehensiveDataDeduplicator::new(config);
1074
1075        let test_string = "consistent_hash_test";
1076        let hash1 = deduplicator.calculate_string_hash(test_string);
1077        let hash2 = deduplicator.calculate_string_hash(test_string);
1078        assert_eq!(hash1, hash2);
1079
1080        let frames = vec![create_test_stack_frame("main", "main.rs", 10)];
1081        let stack_hash1 = deduplicator.calculate_stack_hash(&frames);
1082        let stack_hash2 = deduplicator.calculate_stack_hash(&frames);
1083        assert_eq!(stack_hash1, stack_hash2);
1084
1085        let metadata = create_test_metadata();
1086        let meta_hash1 = deduplicator.calculate_metadata_hash(&metadata);
1087        let meta_hash2 = deduplicator.calculate_metadata_hash(&metadata);
1088        assert_eq!(meta_hash1, meta_hash2);
1089    }
1090
1091    #[test]
1092    fn test_compression_decompression() {
1093        let config = DeduplicationConfig::default();
1094        let deduplicator = ComprehensiveDataDeduplicator::new(config);
1095
1096        let test_data = b"Hello, World! This is test data for compression.";
1097
1098        let compressed = deduplicator
1099            .compress_data(test_data)
1100            .expect("Failed to compress data");
1101        assert!(compressed.len() > test_data.len()); // Due to prefix in simulation
1102
1103        let decompressed = deduplicator
1104            .decompress_data(&compressed)
1105            .expect("Failed to decompress data");
1106        assert_eq!(decompressed, test_data);
1107    }
1108
1109    #[test]
1110    fn test_compression_invalid_format() {
1111        let config = DeduplicationConfig::default();
1112        let deduplicator = ComprehensiveDataDeduplicator::new(config);
1113
1114        let invalid_data = b"INVALID_FORMAT:data";
1115        let result = deduplicator.decompress_data(invalid_data);
1116        assert!(result.is_err());
1117    }
1118
1119    #[test]
1120    fn test_serialization_deserialization() {
1121        let config = DeduplicationConfig::default();
1122        let deduplicator = ComprehensiveDataDeduplicator::new(config);
1123
1124        let frames = vec![
1125            create_test_stack_frame("main", "main.rs", 10),
1126            create_test_stack_frame("foo", "lib.rs", 20),
1127        ];
1128
1129        let serialized = deduplicator
1130            .serialize_stack_frames(&frames)
1131            .expect("Failed to serialize frames");
1132        assert!(!serialized.is_empty());
1133
1134        let deserialized = deduplicator
1135            .deserialize_stack_frames(&serialized)
1136            .expect("Failed to deserialize frames");
1137        // Note: Due to simulation, deserialized will be empty
1138        assert!(deserialized.is_empty());
1139
1140        let metadata = create_test_metadata();
1141        let serialized_meta = deduplicator
1142            .serialize_metadata(&metadata)
1143            .expect("Failed to serialize metadata");
1144        assert!(!serialized_meta.is_empty());
1145
1146        let deserialized_meta = deduplicator
1147            .deserialize_metadata(&serialized_meta)
1148            .expect("Failed to deserialize metadata");
1149        // Note: Due to simulation, deserialized will be empty
1150        assert!(deserialized_meta.is_empty());
1151    }
1152
1153    #[test]
1154    fn test_get_nonexistent_data() {
1155        let config = DeduplicationConfig::default();
1156        let deduplicator = ComprehensiveDataDeduplicator::new(config);
1157
1158        let fake_string_ref = DeduplicatedString {
1159            hash: 12345,
1160            length: 10,
1161            ref_count: 1,
1162        };
1163        let result = deduplicator.get_string(&fake_string_ref);
1164        assert!(result.is_err());
1165
1166        let fake_stack_ref = DeduplicatedStackTrace {
1167            hash: 67890,
1168            frame_count: 5,
1169            ref_count: 1,
1170        };
1171        let result = deduplicator.get_stack_trace(&fake_stack_ref);
1172        assert!(result.is_err());
1173
1174        let fake_metadata_ref = DeduplicatedMetadata {
1175            hash: 11111,
1176            entry_count: 3,
1177            ref_count: 1,
1178        };
1179        let result = deduplicator.get_metadata(&fake_metadata_ref);
1180        assert!(result.is_err());
1181    }
1182
1183    #[test]
1184    fn test_deduplicated_structs_equality() {
1185        let string_ref1 = DeduplicatedString {
1186            hash: 123,
1187            length: 10,
1188            ref_count: 1,
1189        };
1190        let string_ref2 = DeduplicatedString {
1191            hash: 123,
1192            length: 10,
1193            ref_count: 2, // Different ref count
1194        };
1195        let string_ref3 = DeduplicatedString {
1196            hash: 123,
1197            length: 10,
1198            ref_count: 1,
1199        };
1200
1201        assert_ne!(string_ref1, string_ref2); // Different ref count
1202        assert_eq!(string_ref1, string_ref3); // Same values
1203
1204        let stack_ref1 = DeduplicatedStackTrace {
1205            hash: 456,
1206            frame_count: 5,
1207            ref_count: 1,
1208        };
1209        let stack_ref2 = DeduplicatedStackTrace {
1210            hash: 456,
1211            frame_count: 5,
1212            ref_count: 1,
1213        };
1214        assert_eq!(stack_ref1, stack_ref2);
1215
1216        let meta_ref1 = DeduplicatedMetadata {
1217            hash: 789,
1218            entry_count: 3,
1219            ref_count: 1,
1220        };
1221        let meta_ref2 = DeduplicatedMetadata {
1222            hash: 789,
1223            entry_count: 3,
1224            ref_count: 1,
1225        };
1226        assert_eq!(meta_ref1, meta_ref2);
1227    }
1228
1229    #[test]
1230    fn test_global_deduplicator() {
1231        // Create a separate instance to avoid global state conflicts
1232        let config = DeduplicationConfig::default();
1233        let deduplicator = ComprehensiveDataDeduplicator::new(config);
1234
1235        // Test basic functionality
1236        let test_string = "global_test";
1237        let result = deduplicator
1238            .deduplicate_string(test_string)
1239            .expect("Failed to deduplicate string");
1240        assert_eq!(result.length, test_string.len());
1241
1242        // Test that global function works (but don't use the result in tests)
1243        let _global = get_global_data_deduplicator();
1244    }
1245
1246    #[test]
1247    fn test_initialize_global_deduplicator() {
1248        let custom_config = DeduplicationConfig {
1249            enable_string_dedup: false,
1250            enable_stack_dedup: true,
1251            enable_metadata_dedup: false,
1252            max_cache_size: 1000,
1253            enable_compression: false,
1254            compression_threshold: 2048,
1255            enable_stats: false,
1256            cleanup_threshold: 0.5,
1257        };
1258
1259        // Create a local instance instead of global to avoid conflicts
1260        let deduplicator = ComprehensiveDataDeduplicator::new(custom_config);
1261
1262        // Test that the instance has the custom config
1263        assert!(!deduplicator.config.enable_string_dedup);
1264        assert!(deduplicator.config.enable_stack_dedup);
1265        assert!(!deduplicator.config.enable_metadata_dedup);
1266        assert_eq!(deduplicator.config.max_cache_size, 1000);
1267    }
1268
1269    #[test]
1270    fn test_stats_serialization() {
1271        let stats = DeduplicationStats {
1272            strings_deduplicated: 10,
1273            stack_traces_deduplicated: 5,
1274            metadata_deduplicated: 3,
1275            memory_saved_bytes: 1024,
1276            compression_ratio: 0.75,
1277            cache_hit_rate: 0.85,
1278            total_operations: 20,
1279            cleanup_operations: 2,
1280        };
1281
1282        let serialized = serde_json::to_string(&stats).expect("Failed to serialize stats");
1283        let deserialized: DeduplicationStats =
1284            serde_json::from_str(&serialized).expect("Failed to deserialize stats");
1285
1286        assert_eq!(deserialized.strings_deduplicated, 10);
1287        assert_eq!(deserialized.stack_traces_deduplicated, 5);
1288        assert_eq!(deserialized.metadata_deduplicated, 3);
1289        assert_eq!(deserialized.memory_saved_bytes, 1024);
1290        assert!((deserialized.compression_ratio - 0.75).abs() < f64::EPSILON);
1291        assert!((deserialized.cache_hit_rate - 0.85).abs() < f64::EPSILON);
1292        assert_eq!(deserialized.total_operations, 20);
1293        assert_eq!(deserialized.cleanup_operations, 2);
1294    }
1295
1296    #[test]
1297    fn test_deduplicated_refs_serialization() {
1298        let string_ref = DeduplicatedString {
1299            hash: 123456789,
1300            length: 42,
1301            ref_count: 5,
1302        };
1303
1304        let serialized =
1305            serde_json::to_string(&string_ref).expect("Failed to serialize string ref");
1306        let deserialized: DeduplicatedString =
1307            serde_json::from_str(&serialized).expect("Failed to deserialize string ref");
1308
1309        assert_eq!(deserialized.hash, 123456789);
1310        assert_eq!(deserialized.length, 42);
1311        assert_eq!(deserialized.ref_count, 5);
1312    }
1313
1314    #[test]
1315    fn test_string_deduplication_deadlock_fix() {
1316        // This test specifically verifies the deadlock bug fix
1317        // Previously, the second call would hang due to entry.get() after entry.insert()
1318        let config = DeduplicationConfig {
1319            enable_string_dedup: true,
1320            enable_stats: true,
1321            ..Default::default()
1322        };
1323
1324        let deduplicator = ComprehensiveDataDeduplicator::new(config);
1325        let test_string = "deadlock_test_string";
1326
1327        // First call - should create new entry
1328        let result1 = deduplicator
1329            .deduplicate_string(test_string)
1330            .expect("First call should succeed");
1331        assert_eq!(result1.ref_count, 1);
1332        assert_eq!(result1.length, test_string.len());
1333
1334        // Second call - this used to deadlock, now should work
1335        let result2 = deduplicator
1336            .deduplicate_string(test_string)
1337            .expect("Second call should succeed without deadlock");
1338        assert_eq!(result2.ref_count, 2);
1339        assert_eq!(result2.hash, result1.hash);
1340        assert_eq!(result2.length, result1.length);
1341
1342        // Third call - verify it continues to work
1343        let result3 = deduplicator
1344            .deduplicate_string(test_string)
1345            .expect("Third call should succeed");
1346        assert_eq!(result3.ref_count, 3);
1347        assert_eq!(result3.hash, result1.hash);
1348
1349        // Verify stats were updated correctly
1350        let stats = deduplicator.get_stats().expect("Should get stats");
1351        assert!(stats.strings_deduplicated >= 2); // At least 2 deduplication operations
1352    }
1353
1354    #[test]
1355    fn test_concurrent_string_deduplication_safety() {
1356        use std::sync::Arc;
1357        use std::thread;
1358
1359        let config = DeduplicationConfig {
1360            enable_string_dedup: true,
1361            enable_stats: true,
1362            ..Default::default()
1363        };
1364
1365        let deduplicator = Arc::new(ComprehensiveDataDeduplicator::new(config));
1366        let test_string = "concurrent_test_string";
1367        let num_threads = 4;
1368        let calls_per_thread = 10;
1369
1370        let mut handles = vec![];
1371
1372        // Spawn multiple threads that all try to deduplicate the same string
1373        for thread_id in 0..num_threads {
1374            let dedup_clone = Arc::clone(&deduplicator);
1375            let test_str = test_string.to_string();
1376
1377            let handle = thread::spawn(move || {
1378                let mut results = vec![];
1379                for i in 0..calls_per_thread {
1380                    let result = dedup_clone
1381                        .deduplicate_string(&test_str)
1382                        .unwrap_or_else(|_| panic!("Thread {thread_id} call {i} should succeed"));
1383                    results.push(result);
1384                }
1385                results
1386            });
1387            handles.push(handle);
1388        }
1389
1390        // Collect all results
1391        let mut all_results = vec![];
1392        for handle in handles {
1393            let thread_results = handle.join().expect("Thread should complete successfully");
1394            all_results.extend(thread_results);
1395        }
1396
1397        // Verify all results have the same hash (same string)
1398        let first_hash = all_results[0].hash;
1399        for result in &all_results {
1400            assert_eq!(result.hash, first_hash);
1401            assert_eq!(result.length, test_string.len());
1402            assert!(result.ref_count > 0);
1403        }
1404
1405        // The final ref_count should be the total number of calls
1406        let final_result = deduplicator
1407            .deduplicate_string(test_string)
1408            .expect("Final call should succeed");
1409        let expected_final_count = (num_threads * calls_per_thread) + 1;
1410        assert_eq!(final_result.ref_count, expected_final_count);
1411    }
1412}