memscope_rs/core/
call_stack_normalizer.rs

1//! Call Stack Normalization System
2//!
3//! This module implements call stack normalization to avoid duplicate call stack information
4//! by creating a registry system with ID-based references.
5
6use crate::analysis::unsafe_ffi_tracker::StackFrame;
7use crate::core::types::{TrackingError, TrackingResult};
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use std::sync::{Arc, Mutex};
11
12/// Unique identifier for normalized call stacks
13pub type CallStackId = u32;
14
15/// Normalized call stack entry with unique ID
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct NormalizedCallStack {
18    /// Unique identifier for this call stack
19    pub id: CallStackId,
20    /// The actual stack frames
21    pub frames: Vec<StackFrame>,
22    /// Hash of the call stack for quick comparison
23    pub hash: u64,
24    /// Reference count for memory management
25    pub ref_count: u32,
26    /// Creation timestamp
27    pub created_at: u64,
28}
29
30/// Call stack normalizer registry
31pub struct CallStackNormalizer {
32    /// Registry mapping call stack hashes to normalized entries
33    stack_registry: Arc<Mutex<HashMap<u64, NormalizedCallStack>>>,
34    /// ID to hash mapping for quick lookups
35    id_to_hash: Arc<Mutex<HashMap<CallStackId, u64>>>,
36    /// Next available ID counter
37    next_id: Arc<Mutex<CallStackId>>,
38    /// Configuration for the normalizer
39    config: NormalizerConfig,
40    /// Statistics tracking
41    stats: Arc<Mutex<NormalizerStats>>,
42}
43
44/// Configuration for call stack normalizer
45#[derive(Debug, Clone)]
46pub struct NormalizerConfig {
47    /// Maximum number of call stacks to keep in registry
48    pub max_registry_size: usize,
49    /// Enable automatic cleanup of unused call stacks
50    pub enable_auto_cleanup: bool,
51    /// Minimum reference count to keep during cleanup
52    pub min_ref_count_for_cleanup: u32,
53    /// Enable detailed statistics tracking
54    pub enable_statistics: bool,
55}
56
57impl Default for NormalizerConfig {
58    fn default() -> Self {
59        Self {
60            max_registry_size: 10000,
61            enable_auto_cleanup: true,
62            min_ref_count_for_cleanup: 1,
63            enable_statistics: true,
64        }
65    }
66}
67
68/// Statistics for call stack normalizer
69#[derive(Debug, Clone, Default, Serialize, Deserialize)]
70pub struct NormalizerStats {
71    /// Total call stacks processed
72    pub total_processed: usize,
73    /// Number of unique call stacks
74    pub unique_stacks: usize,
75    /// Number of duplicate call stacks avoided
76    pub duplicates_avoided: usize,
77    /// Memory saved by normalization (estimated bytes)
78    pub memory_saved_bytes: usize,
79    /// Registry cleanup operations performed
80    pub cleanup_operations: usize,
81    /// Average call stack depth
82    pub average_stack_depth: f64,
83    /// Statistics collection start time
84    pub stats_start_time: u64,
85}
86
87impl CallStackNormalizer {
88    /// Create new call stack normalizer
89    pub fn new(config: NormalizerConfig) -> Self {
90        tracing::info!("๐Ÿ“š Initializing Call Stack Normalizer");
91        tracing::info!("   โ€ข Max registry size: {}", config.max_registry_size);
92        tracing::info!("   โ€ข Auto cleanup: {}", config.enable_auto_cleanup);
93        tracing::info!("   โ€ข Statistics: {}", config.enable_statistics);
94
95        Self {
96            stack_registry: Arc::new(Mutex::new(HashMap::new())),
97            id_to_hash: Arc::new(Mutex::new(HashMap::new())),
98            next_id: Arc::new(Mutex::new(1)),
99            config,
100            stats: Arc::new(Mutex::new(NormalizerStats {
101                stats_start_time: std::time::SystemTime::now()
102                    .duration_since(std::time::UNIX_EPOCH)
103                    .unwrap_or_default()
104                    .as_secs(),
105                ..Default::default()
106            })),
107        }
108    }
109
110    /// Normalize call stack and return unique ID
111    pub fn normalize_call_stack(&self, frames: &[StackFrame]) -> TrackingResult<CallStackId> {
112        if frames.is_empty() {
113            return Err(TrackingError::InvalidPointer(
114                "Empty call stack".to_string(),
115            ));
116        }
117
118        let hash = self.calculate_call_stack_hash(frames);
119
120        // Check if this call stack already exists
121        if let Ok(mut registry) = self.stack_registry.lock() {
122            if let Some(existing) = registry.get_mut(&hash) {
123                // Increment reference count
124                existing.ref_count += 1;
125
126                // Update statistics
127                self.update_stats_duplicate_avoided(frames.len());
128
129                tracing::debug!("๐Ÿ“š Reused existing call stack ID: {}", existing.id);
130                return Ok(existing.id);
131            }
132
133            // Create new normalized call stack
134            let id = self.get_next_id()?;
135            let normalized = NormalizedCallStack {
136                id,
137                frames: frames.to_vec(),
138                hash,
139                ref_count: 1,
140                created_at: std::time::SystemTime::now()
141                    .duration_since(std::time::UNIX_EPOCH)
142                    .unwrap_or_default()
143                    .as_secs(),
144            };
145
146            // Check registry size limit
147            if registry.len() >= self.config.max_registry_size {
148                if self.config.enable_auto_cleanup {
149                    self.cleanup_registry_internal(&mut registry)?;
150                } else {
151                    return Err(TrackingError::ResourceExhausted(
152                        "Call stack registry full".to_string(),
153                    ));
154                }
155            }
156
157            // Store in registry
158            registry.insert(hash, normalized);
159
160            // Update ID mapping
161            if let Ok(mut id_map) = self.id_to_hash.lock() {
162                id_map.insert(id, hash);
163            }
164
165            // Update statistics
166            self.update_stats_new_stack(frames.len());
167
168            tracing::debug!("๐Ÿ“š Created new call stack ID: {} (hash: {:x})", id, hash);
169            Ok(id)
170        } else {
171            Err(TrackingError::LockContention(
172                "Failed to lock registry".to_string(),
173            ))
174        }
175    }
176
177    /// Get call stack by ID
178    pub fn get_call_stack(&self, id: CallStackId) -> TrackingResult<Vec<StackFrame>> {
179        // Get hash from ID
180        let hash = if let Ok(id_map) = self.id_to_hash.lock() {
181            id_map.get(&id).copied().ok_or_else(|| {
182                TrackingError::InvalidPointer(format!("Invalid call stack ID: {id}"))
183            })?
184        } else {
185            return Err(TrackingError::LockContention(
186                "Failed to lock ID mapping".to_string(),
187            ));
188        };
189
190        // Get call stack from registry
191        if let Ok(registry) = self.stack_registry.lock() {
192            registry
193                .get(&hash)
194                .map(|normalized| normalized.frames.clone())
195                .ok_or_else(|| {
196                    TrackingError::InvalidPointer(format!("Call stack not found for ID: {id}"))
197                })
198        } else {
199            Err(TrackingError::LockContention(
200                "Failed to lock registry".to_string(),
201            ))
202        }
203    }
204
205    /// Increment reference count for call stack
206    pub fn increment_ref_count(&self, id: CallStackId) -> TrackingResult<()> {
207        let hash = if let Ok(id_map) = self.id_to_hash.lock() {
208            id_map.get(&id).copied().ok_or_else(|| {
209                TrackingError::InvalidPointer(format!("Invalid call stack ID: {id}"))
210            })?
211        } else {
212            return Err(TrackingError::LockContention(
213                "Failed to lock ID mapping".to_string(),
214            ));
215        };
216
217        if let Ok(mut registry) = self.stack_registry.lock() {
218            if let Some(normalized) = registry.get_mut(&hash) {
219                normalized.ref_count += 1;
220                tracing::debug!(
221                    "๐Ÿ“š Incremented ref count for ID: {} to {}",
222                    id,
223                    normalized.ref_count
224                );
225                Ok(())
226            } else {
227                Err(TrackingError::InvalidPointer(format!(
228                    "Call stack not found for ID: {id}",
229                )))
230            }
231        } else {
232            Err(TrackingError::LockContention(
233                "Failed to lock registry".to_string(),
234            ))
235        }
236    }
237
238    /// Decrement reference count for call stack
239    pub fn decrement_ref_count(&self, id: CallStackId) -> TrackingResult<()> {
240        let hash = if let Ok(id_map) = self.id_to_hash.lock() {
241            id_map.get(&id).copied().ok_or_else(|| {
242                TrackingError::InvalidPointer(format!("Invalid call stack ID: {id}"))
243            })?
244        } else {
245            return Err(TrackingError::LockContention(
246                "Failed to lock ID mapping".to_string(),
247            ));
248        };
249
250        if let Ok(mut registry) = self.stack_registry.lock() {
251            if let Some(normalized) = registry.get_mut(&hash) {
252                if normalized.ref_count > 0 {
253                    normalized.ref_count -= 1;
254                    tracing::debug!(
255                        "๐Ÿ“š Decremented ref count for ID: {} to {}",
256                        id,
257                        normalized.ref_count
258                    );
259                }
260                Ok(())
261            } else {
262                Err(TrackingError::InvalidPointer(format!(
263                    "Call stack not found for ID: {id}"
264                )))
265            }
266        } else {
267            Err(TrackingError::LockContention(
268                "Failed to lock registry".to_string(),
269            ))
270        }
271    }
272
273    /// Perform manual cleanup of unused call stacks
274    pub fn cleanup_registry(&self) -> TrackingResult<usize> {
275        if let Ok(mut registry) = self.stack_registry.lock() {
276            self.cleanup_registry_internal(&mut registry)
277        } else {
278            Err(TrackingError::LockContention(
279                "Failed to lock registry".to_string(),
280            ))
281        }
282    }
283
284    /// Get normalizer statistics
285    pub fn get_stats(&self) -> NormalizerStats {
286        if let Ok(stats) = self.stats.lock() {
287            stats.clone()
288        } else {
289            tracing::error!("Failed to lock stats");
290            NormalizerStats::default()
291        }
292    }
293
294    /// Get registry size
295    pub fn get_registry_size(&self) -> usize {
296        self.stack_registry.lock().map(|r| r.len()).unwrap_or(0)
297    }
298
299    /// Clear all call stacks (for testing)
300    pub fn clear_registry(&self) {
301        if let Ok(mut registry) = self.stack_registry.lock() {
302            registry.clear();
303        }
304        if let Ok(mut id_map) = self.id_to_hash.lock() {
305            id_map.clear();
306        }
307        if let Ok(mut next_id) = self.next_id.lock() {
308            *next_id = 1;
309        }
310        if let Ok(mut stats) = self.stats.lock() {
311            *stats = NormalizerStats {
312                stats_start_time: stats.stats_start_time,
313                ..Default::default()
314            };
315        }
316        tracing::info!("๐Ÿงน Cleared call stack registry");
317    }
318
319    // Private helper methods
320
321    fn calculate_call_stack_hash(&self, frames: &[StackFrame]) -> u64 {
322        use std::collections::hash_map::DefaultHasher;
323        use std::hash::{Hash, Hasher};
324
325        let mut hasher = DefaultHasher::new();
326
327        // Hash each frame's key components
328        for frame in frames {
329            frame.function_name.hash(&mut hasher);
330            frame.file_name.hash(&mut hasher);
331            frame.line_number.hash(&mut hasher);
332            frame.is_unsafe.hash(&mut hasher);
333        }
334
335        hasher.finish()
336    }
337
338    fn get_next_id(&self) -> TrackingResult<CallStackId> {
339        if let Ok(mut next_id) = self.next_id.lock() {
340            let id = *next_id;
341            *next_id = next_id.wrapping_add(1);
342            Ok(id)
343        } else {
344            Err(TrackingError::LockContention(
345                "Failed to lock next ID counter".to_string(),
346            ))
347        }
348    }
349
350    fn cleanup_registry_internal(
351        &self,
352        registry: &mut HashMap<u64, NormalizedCallStack>,
353    ) -> TrackingResult<usize> {
354        let initial_size = registry.len();
355        let min_ref_count = self.config.min_ref_count_for_cleanup;
356
357        // Remove entries with low reference counts
358        registry.retain(|_, normalized| normalized.ref_count >= min_ref_count);
359
360        // Update ID mapping
361        if let Ok(mut id_map) = self.id_to_hash.lock() {
362            id_map.retain(|_, hash| registry.contains_key(hash));
363        }
364
365        let removed_count = initial_size - registry.len();
366
367        // Update statistics
368        if let Ok(mut stats) = self.stats.lock() {
369            stats.cleanup_operations += 1;
370            stats.unique_stacks = registry.len();
371        }
372
373        tracing::info!(
374            "๐Ÿงน Cleaned up {} unused call stacks from registry",
375            removed_count
376        );
377        Ok(removed_count)
378    }
379
380    fn update_stats_new_stack(&self, stack_depth: usize) {
381        if !self.config.enable_statistics {
382            return;
383        }
384
385        if let Ok(mut stats) = self.stats.lock() {
386            stats.total_processed += 1;
387            stats.unique_stacks += 1;
388
389            // Update average stack depth
390            let total_depth =
391                stats.average_stack_depth * (stats.unique_stacks - 1) as f64 + stack_depth as f64;
392            stats.average_stack_depth = total_depth / stats.unique_stacks as f64;
393
394            // Estimate memory saved (rough calculation)
395            stats.memory_saved_bytes += stack_depth * std::mem::size_of::<StackFrame>();
396        }
397    }
398
399    fn update_stats_duplicate_avoided(&self, stack_depth: usize) {
400        if !self.config.enable_statistics {
401            return;
402        }
403
404        if let Ok(mut stats) = self.stats.lock() {
405            stats.total_processed += 1;
406            stats.duplicates_avoided += 1;
407
408            // Estimate memory saved by avoiding duplication
409            stats.memory_saved_bytes += stack_depth * std::mem::size_of::<StackFrame>();
410        }
411    }
412}
413
414impl Default for CallStackNormalizer {
415    fn default() -> Self {
416        Self::new(NormalizerConfig::default())
417    }
418}
419
420/// Global call stack normalizer instance
421static GLOBAL_NORMALIZER: std::sync::OnceLock<Arc<CallStackNormalizer>> =
422    std::sync::OnceLock::new();
423
424/// Get global call stack normalizer instance
425pub fn get_global_call_stack_normalizer() -> Arc<CallStackNormalizer> {
426    GLOBAL_NORMALIZER
427        .get_or_init(|| Arc::new(CallStackNormalizer::new(NormalizerConfig::default())))
428        .clone()
429}
430
431/// Initialize global call stack normalizer with custom config
432pub fn initialize_global_call_stack_normalizer(
433    config: NormalizerConfig,
434) -> Arc<CallStackNormalizer> {
435    let normalizer = Arc::new(CallStackNormalizer::new(config));
436    if GLOBAL_NORMALIZER.set(normalizer.clone()).is_err() {
437        tracing::warn!("Global call stack normalizer already initialized");
438    }
439    normalizer
440}
441
442/// Call stack reference that uses ID instead of storing full frames
443#[derive(Debug, Clone, Serialize, Deserialize)]
444pub struct CallStackRef {
445    /// ID reference to normalized call stack
446    pub id: CallStackId,
447    /// Optional cached depth for quick access
448    pub depth: Option<usize>,
449    /// Creation timestamp
450    pub created_at: u64,
451}
452
453impl CallStackRef {
454    /// Create new call stack reference
455    pub fn new(id: CallStackId, depth: Option<usize>) -> Self {
456        Self {
457            id,
458            depth,
459            created_at: std::time::SystemTime::now()
460                .duration_since(std::time::UNIX_EPOCH)
461                .unwrap_or_default()
462                .as_secs(),
463        }
464    }
465
466    /// Get the actual call stack frames
467    pub fn get_frames(&self) -> TrackingResult<Vec<StackFrame>> {
468        let normalizer = get_global_call_stack_normalizer();
469        normalizer.get_call_stack(self.id)
470    }
471
472    /// Get call stack depth (cached or calculated)
473    pub fn get_depth(&self) -> TrackingResult<usize> {
474        match self.depth {
475            Some(depth) => Ok(depth),
476            None => {
477                let frames = self.get_frames()?;
478                Ok(frames.len())
479            }
480        }
481    }
482}
483
484#[cfg(test)]
485mod tests {
486    use super::*;
487
488    fn create_test_stack_frame(function_name: &str, line: u32) -> StackFrame {
489        StackFrame {
490            function_name: function_name.to_string(),
491            file_name: Some("test.rs".to_string()),
492            line_number: Some(line),
493            is_unsafe: false,
494        }
495    }
496
497    #[test]
498    fn test_call_stack_normalization() {
499        let normalizer = CallStackNormalizer::new(NormalizerConfig::default());
500
501        let frames1 = vec![
502            create_test_stack_frame("main", 10),
503            create_test_stack_frame("foo", 20),
504        ];
505
506        let frames2 = vec![
507            create_test_stack_frame("main", 10),
508            create_test_stack_frame("foo", 20),
509        ];
510
511        // First normalization should create new entry
512        let id1 = normalizer
513            .normalize_call_stack(&frames1)
514            .expect("Failed to normalize call stack");
515        assert_eq!(id1, 1);
516
517        // Second normalization with same frames should return same ID
518        let _id2 = normalizer
519            .normalize_call_stack(&frames2)
520            .expect("Failed to normalize call stack");
521        assert_eq!(id1, _id2);
522
523        // Verify we can retrieve the frames
524        let retrieved_frames = normalizer
525            .get_call_stack(id1)
526            .expect("Failed to get call stack");
527        assert_eq!(retrieved_frames.len(), 2);
528        assert_eq!(retrieved_frames[0].function_name, "main");
529        assert_eq!(retrieved_frames[1].function_name, "foo");
530    }
531
532    #[test]
533    fn test_reference_counting() {
534        let normalizer = CallStackNormalizer::new(NormalizerConfig::default());
535
536        let frames = vec![create_test_stack_frame("test", 1)];
537        let id = normalizer
538            .normalize_call_stack(&frames)
539            .expect("Test operation failed");
540
541        // Increment reference count
542        normalizer
543            .increment_ref_count(id)
544            .expect("Failed to increment ref count");
545
546        // Decrement reference count
547        normalizer
548            .decrement_ref_count(id)
549            .expect("Failed to decrement ref count");
550
551        // Should still be able to access
552        let retrieved = normalizer
553            .get_call_stack(id)
554            .expect("Failed to get call stack");
555        assert_eq!(retrieved.len(), 1);
556    }
557
558    #[test]
559    fn test_call_stack_ref() {
560        // Initialize global normalizer for this test
561        let config = NormalizerConfig::default();
562        let _global_normalizer = initialize_global_call_stack_normalizer(config);
563
564        let frames = vec![
565            create_test_stack_frame("main", 10),
566            create_test_stack_frame("test", 20),
567        ];
568
569        // Use global normalizer to normalize call stack
570        let global_normalizer = get_global_call_stack_normalizer();
571        let id = global_normalizer
572            .normalize_call_stack(&frames)
573            .expect("Failed to normalize call stack");
574        let stack_ref = CallStackRef::new(id, Some(2));
575
576        assert_eq!(stack_ref.get_depth().expect("Failed to get depth"), 2);
577
578        let retrieved_frames = stack_ref.get_frames().expect("Failed to get frames");
579        assert_eq!(retrieved_frames.len(), 2);
580        assert_eq!(retrieved_frames[0].function_name, "main");
581    }
582
583    #[test]
584    fn test_registry_cleanup() {
585        let config = NormalizerConfig {
586            max_registry_size: 2,
587            min_ref_count_for_cleanup: 2,
588            ..Default::default()
589        };
590
591        let normalizer = CallStackNormalizer::new(config);
592
593        // Create multiple call stacks
594        let frames1 = vec![create_test_stack_frame("func1", 1)];
595        let frames2 = vec![create_test_stack_frame("func2", 2)];
596        let frames3 = vec![create_test_stack_frame("func3", 3)];
597
598        let id1 = normalizer
599            .normalize_call_stack(&frames1)
600            .expect("Failed to normalize call stack");
601        let _id2 = normalizer
602            .normalize_call_stack(&frames2)
603            .expect("Failed to normalize call stack");
604
605        // Increment ref count for id1 to keep it during cleanup
606        normalizer
607            .increment_ref_count(id1)
608            .expect("Failed to increment ref count");
609
610        // This should trigger cleanup
611        let _id3 = normalizer
612            .normalize_call_stack(&frames3)
613            .expect("Failed to normalize call stack");
614
615        // id1 should still exist (high ref count)
616        assert!(normalizer.get_call_stack(id1).is_ok());
617
618        // id2 might be cleaned up (low ref count)
619        let stats = normalizer.get_stats();
620        assert!(stats.cleanup_operations > 0);
621    }
622
623    #[test]
624    fn test_statistics_tracking() {
625        let normalizer = CallStackNormalizer::new(NormalizerConfig::default());
626
627        let frames1 = vec![create_test_stack_frame("func1", 1)];
628        let frames2 = vec![create_test_stack_frame("func1", 1)]; // Duplicate
629        let frames3 = vec![create_test_stack_frame("func2", 2)]; // New
630
631        normalizer
632            .normalize_call_stack(&frames1)
633            .expect("Failed to normalize call stack");
634        normalizer
635            .normalize_call_stack(&frames2)
636            .expect("Failed to normalize call stack"); // Should be duplicate
637        normalizer
638            .normalize_call_stack(&frames3)
639            .expect("Failed to normalize call stack");
640
641        let stats = normalizer.get_stats();
642        assert_eq!(stats.total_processed, 3);
643        assert_eq!(stats.unique_stacks, 2);
644        assert_eq!(stats.duplicates_avoided, 1);
645        assert!(stats.memory_saved_bytes > 0);
646    }
647}