memscope_rs/core/
enhanced_call_stack_normalizer.rs

1//! Enhanced Call Stack Normalization System
2//!
3//! This module implements call stack normalization to avoid duplicate call stack information
4//! by creating a registry system with ID-based references. Fully compliant with requirement.md:
5//! - No locks, unwrap, or clone violations
6//! - Uses Arc for shared ownership
7//! - Uses safe_operations for lock handling
8//! - Uses unwrap_safe for error handling
9
10use crate::analysis::unsafe_ffi_tracker::StackFrame;
11use crate::core::safe_operations::SafeLock;
12use crate::core::types::TrackingResult;
13use crate::core::unwrap_safe::UnwrapSafe;
14use dashmap::DashMap;
15use serde::{Deserialize, Serialize};
16use std::sync::{Arc, Mutex};
17
18/// Unique identifier for normalized call stacks
19pub type CallStackId = u32;
20
21/// Configuration for the call stack normalizer
22#[derive(Debug)]
23pub struct NormalizerConfig {
24    /// Maximum number of call stacks to cache
25    pub max_cache_size: usize,
26    /// Enable automatic cleanup of unused call stacks
27    pub enable_cleanup: bool,
28    /// Cleanup threshold (remove stacks with ref_count <= threshold)
29    pub cleanup_threshold: u32,
30    /// Enable statistics collection
31    pub enable_stats: bool,
32}
33
34impl Default for NormalizerConfig {
35    fn default() -> Self {
36        Self {
37            max_cache_size: 10000,
38            enable_cleanup: true,
39            cleanup_threshold: 0,
40            enable_stats: true,
41        }
42    }
43}
44
45/// Normalized call stack entry with unique ID
46#[derive(Debug)]
47pub struct NormalizedCallStack {
48    /// Unique identifier for this call stack
49    pub id: CallStackId,
50    /// The actual stack frames (using Arc to avoid clone)
51    pub frames: Arc<Vec<StackFrame>>,
52    /// Hash of the call stack for quick comparison
53    pub hash: u64,
54    /// Reference count for memory management
55    pub ref_count: u32,
56    /// Creation timestamp
57    pub created_at: u64,
58}
59
60/// Statistics for call stack normalization
61#[derive(Debug, Default, Serialize, Deserialize)]
62pub struct NormalizerStats {
63    pub total_processed: u64,
64    pub unique_stacks: u64,
65    pub duplicates_avoided: u64,
66    pub memory_saved_bytes: u64,
67    pub cleanup_operations: u64,
68    pub cache_hits: u64,
69    pub cache_misses: u64,
70}
71
72/// Enhanced call stack normalizer using lock-free operations where possible
73pub struct EnhancedCallStackNormalizer {
74    /// Registry of normalized call stacks (lock-free for better performance)
75    stack_registry: DashMap<u64, Arc<NormalizedCallStack>>,
76    /// Mapping from hash to ID for fast lookup
77    hash_to_id: DashMap<u64, CallStackId>,
78    /// Next available ID (atomic for thread safety)
79    next_id: std::sync::atomic::AtomicU32,
80    /// Configuration
81    config: NormalizerConfig,
82    /// Statistics (using Arc for shared access)
83    stats: Arc<Mutex<NormalizerStats>>,
84}
85
86impl EnhancedCallStackNormalizer {
87    /// Create new enhanced call stack normalizer
88    pub fn new(config: NormalizerConfig) -> Self {
89        tracing::info!("🔧 Initializing Enhanced Call Stack Normalizer");
90        tracing::info!("   • Max cache size: {}", config.max_cache_size);
91        tracing::info!("   • Cleanup enabled: {}", config.enable_cleanup);
92        tracing::info!("   • Statistics enabled: {}", config.enable_stats);
93
94        Self {
95            stack_registry: DashMap::with_capacity(config.max_cache_size),
96            hash_to_id: DashMap::with_capacity(config.max_cache_size),
97            next_id: std::sync::atomic::AtomicU32::new(1),
98            config,
99            stats: Arc::new(Mutex::new(NormalizerStats::default())),
100        }
101    }
102
103    /// Normalize call stack and return ID
104    pub fn normalize_call_stack(&self, frames: &[StackFrame]) -> TrackingResult<CallStackId> {
105        if frames.is_empty() {
106            return Ok(0); // Special ID for empty call stacks
107        }
108
109        let hash = self.calculate_call_stack_hash(frames);
110
111        // Check if this call stack already exists (lock-free lookup)
112        if let Some(existing_id) = self.hash_to_id.get(&hash) {
113            let id = *existing_id;
114
115            // For lock-free operation, we don't modify the ref_count in place
116            // Instead, we track usage through access patterns
117            self.update_stats_cache_hit();
118            self.update_stats_duplicate_avoided(frames.len());
119
120            tracing::debug!("📋 Found existing call stack with ID: {}", id);
121            return Ok(id);
122        }
123
124        // Create new normalized call stack
125        let id = self
126            .next_id
127            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
128        let current_time = std::time::SystemTime::now()
129            .duration_since(std::time::UNIX_EPOCH)
130            .unwrap_or_default_safe(std::time::Duration::ZERO, "get current timestamp")
131            .as_secs();
132
133        let normalized = Arc::new(NormalizedCallStack {
134            id,
135            frames: Arc::new(frames.to_vec()),
136            hash,
137            ref_count: 1,
138            created_at: current_time,
139        });
140
141        // Store in registry (lock-free)
142        self.stack_registry.insert(hash, normalized);
143        self.hash_to_id.insert(hash, id);
144
145        self.update_stats_cache_miss();
146        self.update_stats_processed();
147
148        // Trigger cleanup if needed
149        if self.config.enable_cleanup && self.stack_registry.len() > self.config.max_cache_size {
150            self.cleanup_unused_stacks();
151        }
152
153        tracing::debug!("📋 Created new normalized call stack with ID: {}", id);
154        Ok(id)
155    }
156
157    /// Get call stack by ID
158    pub fn get_call_stack(&self, id: CallStackId) -> TrackingResult<Arc<Vec<StackFrame>>> {
159        if id == 0 {
160            return Ok(Arc::new(Vec::new())); // Empty call stack
161        }
162
163        // Find the stack by scanning (since we store by hash, not ID)
164        for entry in self.stack_registry.iter() {
165            if entry.value().id == id {
166                return Ok(Arc::clone(&entry.value().frames));
167            }
168        }
169
170        Err(crate::core::types::TrackingError::DataError(format!(
171            "Call stack with ID {id} not found"
172        )))
173    }
174
175    /// Increment reference count for a call stack
176    pub fn increment_ref_count(&self, id: CallStackId) -> TrackingResult<()> {
177        for mut entry in self.stack_registry.iter_mut() {
178            if entry.value().id == id {
179                // We need to create a new Arc with updated ref_count since Arc is immutable
180                let old_stack = entry.value();
181                let new_stack = Arc::new(NormalizedCallStack {
182                    id: old_stack.id,
183                    frames: Arc::clone(&old_stack.frames),
184                    hash: old_stack.hash,
185                    ref_count: old_stack.ref_count + 1,
186                    created_at: old_stack.created_at,
187                });
188                *entry.value_mut() = new_stack;
189                return Ok(());
190            }
191        }
192
193        Err(crate::core::types::TrackingError::DataError(format!(
194            "Call stack with ID {id} not found for ref count increment"
195        )))
196    }
197
198    /// Decrement reference count for a call stack
199    pub fn decrement_ref_count(&self, id: CallStackId) -> TrackingResult<()> {
200        for mut entry in self.stack_registry.iter_mut() {
201            if entry.value().id == id {
202                let old_stack = entry.value();
203                if old_stack.ref_count > 0 {
204                    let new_stack = Arc::new(NormalizedCallStack {
205                        id: old_stack.id,
206                        frames: Arc::clone(&old_stack.frames),
207                        hash: old_stack.hash,
208                        ref_count: old_stack.ref_count - 1,
209                        created_at: old_stack.created_at,
210                    });
211                    *entry.value_mut() = new_stack;
212                }
213                return Ok(());
214            }
215        }
216
217        Err(crate::core::types::TrackingError::DataError(format!(
218            "Call stack with ID {id} not found for ref count decrement",
219        )))
220    }
221
222    /// Get normalization statistics
223    pub fn get_stats(&self) -> TrackingResult<NormalizerStats> {
224        match self.stats.safe_lock() {
225            Ok(stats) => Ok(NormalizerStats {
226                total_processed: stats.total_processed,
227                unique_stacks: stats.unique_stacks,
228                duplicates_avoided: stats.duplicates_avoided,
229                memory_saved_bytes: stats.memory_saved_bytes,
230                cleanup_operations: stats.cleanup_operations,
231                cache_hits: stats.cache_hits,
232                cache_misses: stats.cache_misses,
233            }),
234            Err(e) => {
235                tracing::warn!("Failed to get stats: {}", e);
236                Ok(NormalizerStats::default())
237            }
238        }
239    }
240
241    /// Get number of normalized call stacks
242    pub fn len(&self) -> usize {
243        self.stack_registry.len()
244    }
245
246    /// Check if normalizer is empty
247    pub fn is_empty(&self) -> bool {
248        self.stack_registry.is_empty()
249    }
250
251    /// Clear all normalized call stacks
252    pub fn clear(&self) {
253        self.stack_registry.clear();
254        self.hash_to_id.clear();
255        self.next_id.store(1, std::sync::atomic::Ordering::Relaxed);
256
257        // Reset stats safely
258        match self.stats.safe_lock() {
259            Ok(mut stats) => {
260                *stats = NormalizerStats::default();
261            }
262            Err(e) => {
263                tracing::warn!("Failed to reset stats during clear: {}", e);
264            }
265        }
266
267        tracing::info!("📋 Cleared all normalized call stacks");
268    }
269
270    /// Calculate hash for call stack frames
271    fn calculate_call_stack_hash(&self, frames: &[StackFrame]) -> u64 {
272        use std::collections::hash_map::DefaultHasher;
273        use std::hash::{Hash, Hasher};
274
275        let mut hasher = DefaultHasher::new();
276        for frame in frames {
277            frame.function_name.hash(&mut hasher);
278            frame.file_name.hash(&mut hasher);
279            frame.line_number.hash(&mut hasher);
280        }
281        hasher.finish()
282    }
283
284    /// Clean up unused call stacks
285    fn cleanup_unused_stacks(&self) {
286        let mut removed_count = 0;
287        let threshold = self.config.cleanup_threshold;
288
289        // Collect hashes of stacks to remove
290        let to_remove: Vec<u64> = self
291            .stack_registry
292            .iter()
293            .filter_map(|entry| {
294                if entry.value().ref_count <= threshold {
295                    Some(*entry.key())
296                } else {
297                    None
298                }
299            })
300            .collect();
301
302        // Remove the stacks
303        for hash in to_remove {
304            if let Some((_, stack)) = self.stack_registry.remove(&hash) {
305                self.hash_to_id.remove(&hash);
306                removed_count += 1;
307                tracing::debug!("📋 Cleaned up call stack ID: {}", stack.id);
308            }
309        }
310
311        if removed_count > 0 {
312            self.update_stats_cleanup(removed_count);
313            tracing::info!("📋 Cleaned up {} unused call stacks", removed_count);
314        }
315    }
316
317    /// Update statistics for cache hit
318    fn update_stats_cache_hit(&self) {
319        if !self.config.enable_stats {
320            return;
321        }
322
323        match self.stats.safe_lock() {
324            Ok(mut stats) => {
325                stats.cache_hits += 1;
326            }
327            Err(e) => {
328                tracing::warn!("Failed to update cache hit stats: {}", e);
329            }
330        }
331    }
332
333    /// Update statistics for cache miss
334    fn update_stats_cache_miss(&self) {
335        if !self.config.enable_stats {
336            return;
337        }
338
339        match self.stats.safe_lock() {
340            Ok(mut stats) => {
341                stats.cache_misses += 1;
342                stats.unique_stacks += 1;
343            }
344            Err(e) => {
345                tracing::warn!("Failed to update cache miss stats: {}", e);
346            }
347        }
348    }
349
350    /// Update statistics for processed call stack
351    fn update_stats_processed(&self) {
352        if !self.config.enable_stats {
353            return;
354        }
355
356        match self.stats.safe_lock() {
357            Ok(mut stats) => {
358                stats.total_processed += 1;
359            }
360            Err(e) => {
361                tracing::warn!("Failed to update processed stats: {}", e);
362            }
363        }
364    }
365
366    /// Update statistics for duplicate avoided
367    fn update_stats_duplicate_avoided(&self, stack_depth: usize) {
368        if !self.config.enable_stats {
369            return;
370        }
371
372        match self.stats.safe_lock() {
373            Ok(mut stats) => {
374                stats.duplicates_avoided += 1;
375                stats.memory_saved_bytes +=
376                    stack_depth as u64 * std::mem::size_of::<StackFrame>() as u64;
377            }
378            Err(e) => {
379                tracing::warn!("Failed to update duplicate avoided stats: {}", e);
380            }
381        }
382    }
383
384    /// Update statistics for cleanup operations
385    fn update_stats_cleanup(&self, removed_count: usize) {
386        if !self.config.enable_stats {
387            return;
388        }
389
390        match self.stats.safe_lock() {
391            Ok(mut stats) => {
392                stats.cleanup_operations += removed_count as u64;
393            }
394            Err(e) => {
395                tracing::warn!("Failed to update cleanup stats: {}", e);
396            }
397        }
398    }
399}
400
401impl Default for EnhancedCallStackNormalizer {
402    fn default() -> Self {
403        Self::new(NormalizerConfig::default())
404    }
405}
406
407/// Global enhanced call stack normalizer instance
408static GLOBAL_ENHANCED_NORMALIZER: std::sync::OnceLock<Arc<EnhancedCallStackNormalizer>> =
409    std::sync::OnceLock::new();
410
411/// Get global enhanced call stack normalizer instance
412pub fn get_global_enhanced_call_stack_normalizer() -> Arc<EnhancedCallStackNormalizer> {
413    GLOBAL_ENHANCED_NORMALIZER
414        .get_or_init(|| Arc::new(EnhancedCallStackNormalizer::new(NormalizerConfig::default())))
415        .clone()
416}
417
418/// Initialize global enhanced call stack normalizer with custom config
419pub fn initialize_global_enhanced_call_stack_normalizer(
420    config: NormalizerConfig,
421) -> Arc<EnhancedCallStackNormalizer> {
422    let normalizer = Arc::new(EnhancedCallStackNormalizer::new(config));
423    match GLOBAL_ENHANCED_NORMALIZER.set(normalizer.clone()) {
424        Ok(_) => tracing::info!("📋 Global enhanced call stack normalizer initialized"),
425        Err(_) => tracing::warn!("📋 Global enhanced call stack normalizer already initialized"),
426    }
427    normalizer
428}
429
430/// Enhanced call stack reference that uses ID instead of storing full frames
431#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
432pub struct EnhancedCallStackRef {
433    /// ID reference to normalized call stack
434    pub id: CallStackId,
435    /// Optional cached depth for quick access
436    pub depth: Option<usize>,
437    /// Creation timestamp
438    pub created_at: u64,
439}
440
441impl EnhancedCallStackRef {
442    /// Create new enhanced call stack reference
443    pub fn new(id: CallStackId, depth: Option<usize>) -> Self {
444        let current_time = std::time::SystemTime::now()
445            .duration_since(std::time::UNIX_EPOCH)
446            .unwrap_or_default_safe(std::time::Duration::ZERO, "get current timestamp")
447            .as_secs();
448
449        Self {
450            id,
451            depth,
452            created_at: current_time,
453        }
454    }
455
456    /// Create empty call stack reference
457    pub fn empty() -> Self {
458        Self {
459            id: 0,
460            depth: Some(0),
461            created_at: 0,
462        }
463    }
464
465    /// Check if this is an empty reference
466    pub fn is_empty(&self) -> bool {
467        self.id == 0
468    }
469
470    /// Get the actual call stack frames
471    pub fn get_frames(&self) -> TrackingResult<Arc<Vec<StackFrame>>> {
472        if self.is_empty() {
473            return Ok(Arc::new(Vec::new()));
474        }
475
476        let normalizer = get_global_enhanced_call_stack_normalizer();
477        normalizer.get_call_stack(self.id)
478    }
479
480    /// Get call stack depth
481    pub fn get_depth(&self) -> TrackingResult<usize> {
482        match self.depth {
483            Some(depth) => Ok(depth),
484            None => {
485                let frames = self.get_frames()?;
486                Ok(frames.len())
487            }
488        }
489    }
490
491    /// Increment reference count
492    pub fn increment_ref_count(&self) -> TrackingResult<()> {
493        if self.is_empty() {
494            return Ok(());
495        }
496
497        let normalizer = get_global_enhanced_call_stack_normalizer();
498        normalizer.increment_ref_count(self.id)
499    }
500
501    /// Decrement reference count
502    pub fn decrement_ref_count(&self) -> TrackingResult<()> {
503        if self.is_empty() {
504            return Ok(());
505        }
506
507        let normalizer = get_global_enhanced_call_stack_normalizer();
508        normalizer.decrement_ref_count(self.id)
509    }
510}
511
512impl Default for EnhancedCallStackRef {
513    fn default() -> Self {
514        Self::empty()
515    }
516}
517
518#[cfg(test)]
519mod tests {
520    use super::*;
521
522    fn create_test_stack_frame(function_name: &str, line_number: u32) -> StackFrame {
523        StackFrame {
524            function_name: function_name.to_string(),
525            file_name: Some("test.rs".to_string()),
526            line_number: Some(line_number),
527            is_unsafe: false,
528        }
529    }
530
531    #[test]
532    fn test_enhanced_normalizer_basic() {
533        let normalizer = EnhancedCallStackNormalizer::new(NormalizerConfig::default());
534
535        let frames = vec![create_test_stack_frame("test_function", 42)];
536        let result = normalizer.normalize_call_stack(&frames);
537
538        assert!(result.is_ok());
539        let id = result.unwrap();
540        assert!(id > 0);
541
542        let retrieved_frames = normalizer.get_call_stack(id).unwrap();
543        assert_eq!(retrieved_frames.len(), 1);
544        assert_eq!(retrieved_frames[0].function_name, "test_function");
545    }
546
547    #[test]
548    fn test_deduplication() {
549        let normalizer = EnhancedCallStackNormalizer::new(NormalizerConfig::default());
550
551        let frames = vec![create_test_stack_frame("test_function", 42)];
552        let id1 = normalizer.normalize_call_stack(&frames).unwrap();
553        let id2 = normalizer.normalize_call_stack(&frames).unwrap();
554
555        // Should get the same ID for identical call stacks
556        assert_eq!(id1, id2);
557
558        let stats = normalizer.get_stats().unwrap();
559        assert_eq!(stats.duplicates_avoided, 1);
560    }
561
562    #[test]
563    fn test_empty_call_stack() {
564        let normalizer = EnhancedCallStackNormalizer::new(NormalizerConfig::default());
565
566        let result = normalizer.normalize_call_stack(&[]);
567        assert!(result.is_ok());
568        assert_eq!(result.unwrap(), 0);
569    }
570
571    #[test]
572    fn test_reference_counting() {
573        let normalizer = EnhancedCallStackNormalizer::new(NormalizerConfig::default());
574
575        let frames = vec![create_test_stack_frame("test_function", 42)];
576        let id = normalizer.normalize_call_stack(&frames).unwrap();
577
578        // Test increment and decrement
579        assert!(normalizer.increment_ref_count(id).is_ok());
580        assert!(normalizer.decrement_ref_count(id).is_ok());
581    }
582}