memscope_rs/cli/commands/html_from_json/
data_normalizer.rs

1//! Data normalization module for HTML export
2//!
3//! This module provides functionality to normalize and standardize data from
4//! different JSON sources into a unified format for HTML visualization.
5
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use std::collections::HashMap;
9use std::error::Error;
10use std::fmt;
11
12/// Unified data structure for memory analysis
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct UnifiedMemoryData {
15    /// Core memory statistics
16    pub stats: MemoryStatistics,
17
18    /// Memory allocation details
19    pub allocations: Vec<AllocationInfo>,
20
21    /// Performance analysis data
22    pub performance: PerformanceMetrics,
23
24    /// Lifecycle analysis
25    pub lifecycle: LifecycleAnalysis,
26
27    /// Security analysis
28    pub security: SecurityAnalysis,
29
30    /// Complex type analysis
31    pub complex_types: ComplexTypeAnalysis,
32
33    /// Variable relationships
34    pub variable_relationships: VariableRelationships,
35
36    /// Analysis metadata
37    pub metadata: AnalysisMetadata,
38
39    /// Original multi-source data (for advanced features)
40    #[serde(rename = "_multiSource")]
41    pub multi_source: HashMap<String, Value>,
42}
43
44/// Core memory statistics
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct MemoryStatistics {
47    /// Active memory in bytes
48    pub active_memory: usize,
49    /// Active allocation count
50    pub active_allocations: usize,
51    /// Peak memory usage in bytes
52    pub peak_memory: usize,
53    /// Total allocations made
54    pub total_allocations: usize,
55    /// Total memory allocated
56    pub total_allocated: usize,
57    /// Memory efficiency percentage
58    pub memory_efficiency: f64,
59}
60
61/// Allocation information
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct AllocationInfo {
64    /// Memory pointer as hex string
65    pub ptr: String,
66    /// Allocation size in bytes
67    pub size: usize,
68    /// Variable name if available
69    pub var_name: Option<String>,
70    /// Type name if available
71    pub type_name: Option<String>,
72    /// Scope name
73    pub scope_name: Option<String>,
74    /// Allocation timestamp
75    pub timestamp_alloc: u64,
76    /// Deallocation timestamp
77    pub timestamp_dealloc: Option<u64>,
78    /// Stack trace
79    pub stack_trace: Option<Vec<String>>,
80    /// Whether allocation is leaked
81    pub is_leaked: bool,
82    /// Lifetime in milliseconds
83    pub lifetime_ms: Option<u64>,
84}
85
86/// Performance metrics
87#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct PerformanceMetrics {
89    /// Processing time in milliseconds
90    pub processing_time_ms: u64,
91    /// Allocations per second
92    pub allocations_per_second: f64,
93    /// Memory efficiency percentage
94    pub memory_efficiency: f64,
95    /// Optimization status
96    pub optimization_status: OptimizationStatus,
97    /// Allocation distribution
98    pub allocation_distribution: AllocationDistribution,
99}
100
101/// Optimization status
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct OptimizationStatus {
104    /// Parallel processing enabled
105    pub parallel_processing: bool,
106    /// Schema validation enabled
107    pub schema_validation: bool,
108    /// Streaming enabled
109    pub streaming_enabled: bool,
110    /// Batch size used
111    pub batch_size: Option<usize>,
112    /// Buffer size in KB
113    pub buffer_size_kb: Option<usize>,
114}
115
116/// Allocation distribution by size
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct AllocationDistribution {
119    /// Tiny allocations (< 64 bytes)
120    pub tiny: usize,
121    /// Small allocations (64-1024 bytes)
122    pub small: usize,
123    /// Medium allocations (1KB-64KB)
124    pub medium: usize,
125    /// Large allocations (64KB-1MB)
126    pub large: usize,
127    /// Massive allocations (> 1MB)
128    pub massive: usize,
129}
130
131/// Lifecycle analysis data
132#[derive(Debug, Clone, Serialize, Deserialize)]
133pub struct LifecycleAnalysis {
134    /// Lifecycle events
135    pub lifecycle_events: Vec<Value>,
136    /// Scope analysis
137    pub scope_analysis: HashMap<String, Value>,
138    /// Variable lifetimes
139    pub variable_lifetimes: HashMap<String, Value>,
140}
141
142/// Security analysis data
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct SecurityAnalysis {
145    /// Total violations count
146    pub total_violations: usize,
147    /// Risk level
148    pub risk_level: String,
149    /// Severity breakdown
150    pub severity_breakdown: SeverityBreakdown,
151    /// Violation reports
152    pub violation_reports: Vec<Value>,
153    /// Recommendations
154    pub recommendations: Vec<String>,
155}
156
157/// Severity breakdown
158#[derive(Debug, Clone, Serialize, Deserialize)]
159pub struct SeverityBreakdown {
160    /// Critical violations
161    pub critical: usize,
162    /// High severity violations
163    pub high: usize,
164    /// Medium severity violations
165    pub medium: usize,
166    /// Low severity violations
167    pub low: usize,
168    /// Info level violations
169    pub info: usize,
170}
171
172/// Complex type analysis
173#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct ComplexTypeAnalysis {
175    /// Categorized types
176    pub categorized_types: CategorizedTypes,
177    /// Complex type analysis details
178    pub complex_type_analysis: Vec<Value>,
179    /// Summary information
180    pub summary: ComplexTypeSummary,
181}
182
183/// Categorized types
184#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct CategorizedTypes {
186    /// Collection types
187    pub collections: Vec<Value>,
188    /// Generic types
189    pub generic_types: Vec<Value>,
190    /// Smart pointers
191    pub smart_pointers: Vec<Value>,
192    /// Trait objects
193    pub trait_objects: Vec<Value>,
194}
195
196/// Complex type summary
197#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct ComplexTypeSummary {
199    /// Total complex types
200    pub total_complex_types: usize,
201    /// Complexity distribution
202    pub complexity_distribution: ComplexityDistribution,
203}
204
205/// Complexity distribution
206#[derive(Debug, Clone, Serialize, Deserialize)]
207pub struct ComplexityDistribution {
208    /// Low complexity types
209    pub low_complexity: usize,
210    /// Medium complexity types
211    pub medium_complexity: usize,
212    /// High complexity types
213    pub high_complexity: usize,
214    /// Very high complexity types
215    pub very_high_complexity: usize,
216}
217
218/// Variable relationships
219#[derive(Debug, Clone, Serialize, Deserialize)]
220pub struct VariableRelationships {
221    /// Relationship data
222    pub relationships: Vec<Value>,
223    /// Variable registry
224    pub registry: HashMap<String, Value>,
225    /// Dependency graph
226    pub dependency_graph: HashMap<String, Value>,
227    /// Scope hierarchy
228    pub scope_hierarchy: HashMap<String, Value>,
229}
230
231/// Analysis metadata
232#[derive(Debug, Clone, Serialize, Deserialize)]
233pub struct AnalysisMetadata {
234    /// Analysis timestamp
235    pub timestamp: u64,
236    /// Export version
237    pub export_version: String,
238    /// Analysis type
239    pub analysis_type: String,
240    /// Data integrity hash
241    pub data_integrity_hash: Option<String>,
242}
243
244/// Data normalization error
245#[derive(Debug)]
246pub enum NormalizationError {
247    /// Missing required field
248    MissingField(String),
249    /// Invalid data type
250    InvalidType(String),
251    /// Data validation error
252    ValidationError(String),
253    /// JSON parsing error
254    JsonError(serde_json::Error),
255}
256
257impl fmt::Display for NormalizationError {
258    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
259        match self {
260            NormalizationError::MissingField(field) => {
261                write!(f, "Missing required field: {}", field)
262            }
263            NormalizationError::InvalidType(msg) => {
264                write!(f, "Invalid data type: {}", msg)
265            }
266            NormalizationError::ValidationError(msg) => {
267                write!(f, "Data validation error: {}", msg)
268            }
269            NormalizationError::JsonError(err) => {
270                write!(f, "JSON error: {}", err)
271            }
272        }
273    }
274}
275
276impl Error for NormalizationError {}
277
278impl From<serde_json::Error> for NormalizationError {
279    fn from(err: serde_json::Error) -> Self {
280        NormalizationError::JsonError(err)
281    }
282}
283
284/// Data normalizer for converting multi-source JSON to unified format
285pub struct DataNormalizer {
286    /// Validation enabled
287    validation_enabled: bool,
288    /// Default values for missing fields
289    #[allow(dead_code)]
290    default_values: HashMap<String, Value>,
291}
292
293impl DataNormalizer {
294    /// Create a new data normalizer
295    pub fn new() -> Self {
296        Self {
297            validation_enabled: true,
298            default_values: Self::create_default_values(),
299        }
300    }
301
302    /// Create normalizer with validation disabled
303    pub fn without_validation() -> Self {
304        Self {
305            validation_enabled: false,
306            default_values: Self::create_default_values(),
307        }
308    }
309
310    /// Normalize multi-source JSON data to unified format
311    pub fn normalize(
312        &self,
313        multi_source: &HashMap<String, Value>,
314    ) -> Result<UnifiedMemoryData, NormalizationError> {
315        println!("🔄 Starting data normalization...");
316
317        // Extract and normalize each data source
318        let stats = self.normalize_memory_stats(multi_source)?;
319        let allocations = self.normalize_allocations(multi_source)?;
320        let performance = self.normalize_performance(multi_source)?;
321        let lifecycle = self.normalize_lifecycle(multi_source)?;
322        let security = self.normalize_security(multi_source)?;
323        let complex_types = self.normalize_complex_types(multi_source)?;
324        let variable_relationships = self.normalize_variable_relationships(multi_source)?;
325        let metadata = self.normalize_metadata(multi_source)?;
326
327        let unified = UnifiedMemoryData {
328            stats,
329            allocations,
330            performance,
331            lifecycle,
332            security,
333            complex_types,
334            variable_relationships,
335            metadata,
336            multi_source: multi_source.clone(),
337        };
338
339        // Validate the unified data if validation is enabled
340        if self.validation_enabled {
341            self.validate_unified_data(&unified)?;
342        }
343
344        println!("✅ Data normalization completed successfully");
345        Ok(unified)
346    }
347
348    /// Create default values for missing fields
349    fn create_default_values() -> HashMap<String, Value> {
350        let mut defaults = HashMap::new();
351        defaults.insert("active_memory".to_string(), Value::Number(0.into()));
352        defaults.insert("active_allocations".to_string(), Value::Number(0.into()));
353        defaults.insert("peak_memory".to_string(), Value::Number(0.into()));
354        defaults.insert("total_allocations".to_string(), Value::Number(0.into()));
355        defaults.insert("total_allocated".to_string(), Value::Number(0.into()));
356        defaults.insert(
357            "memory_efficiency".to_string(),
358            Value::Number(serde_json::Number::from_f64(0.0).unwrap()),
359        );
360        defaults
361    }
362
363    /// Normalize memory statistics
364    fn normalize_memory_stats(
365        &self,
366        multi_source: &HashMap<String, Value>,
367    ) -> Result<MemoryStatistics, NormalizationError> {
368        // Try to get stats from memory_analysis first, then performance
369        let memory_data = multi_source.get("memory_analysis");
370        let performance_data = multi_source.get("performance");
371
372        let memory_stats = memory_data
373            .and_then(|data| data.get("memory_stats"))
374            .or_else(|| memory_data.and_then(|data| data.get("stats")));
375
376        let perf_memory = performance_data.and_then(|data| data.get("memory_performance"));
377
378        let metadata = memory_data.and_then(|data| data.get("metadata"));
379
380        Ok(MemoryStatistics {
381            active_memory: self
382                .extract_usize(memory_stats, "active_memory")
383                .or_else(|| self.extract_usize(perf_memory, "active_memory"))
384                .unwrap_or(0),
385            active_allocations: self.count_active_allocations(memory_data),
386            peak_memory: self
387                .extract_usize(memory_stats, "peak_memory")
388                .or_else(|| self.extract_usize(perf_memory, "peak_memory"))
389                .unwrap_or(0),
390            total_allocations: self
391                .extract_usize(memory_stats, "total_allocations")
392                .or_else(|| self.extract_usize(perf_memory, "total_allocated"))
393                .or_else(|| self.extract_usize(metadata, "total_allocations"))
394                .unwrap_or(0),
395            total_allocated: self
396                .extract_usize(memory_stats, "total_allocated")
397                .or_else(|| self.extract_usize(perf_memory, "total_allocated"))
398                .unwrap_or(0),
399            memory_efficiency: self
400                .extract_f64(perf_memory, "memory_efficiency")
401                .unwrap_or(0.0),
402        })
403    }
404
405    /// Count active allocations from allocation array
406    fn count_active_allocations(&self, memory_data: Option<&Value>) -> usize {
407        memory_data
408            .and_then(|data| data.get("allocations"))
409            .and_then(|allocs| allocs.as_array())
410            .map(|arr| arr.len())
411            .unwrap_or(0)
412    }
413
414    /// Normalize allocations data
415    fn normalize_allocations(
416        &self,
417        multi_source: &HashMap<String, Value>,
418    ) -> Result<Vec<AllocationInfo>, NormalizationError> {
419        let memory_data = multi_source.get("memory_analysis");
420        let empty_vec = vec![];
421        let allocations_array = memory_data
422            .and_then(|data| data.get("allocations"))
423            .and_then(|allocs| allocs.as_array())
424            .unwrap_or(&empty_vec);
425
426        let mut normalized_allocations = Vec::new();
427
428        for (index, alloc) in allocations_array.iter().enumerate() {
429            if let Some(_alloc_obj) = alloc.as_object() {
430                let allocation_info = AllocationInfo {
431                    ptr: self
432                        .extract_string(Some(alloc), "ptr")
433                        .unwrap_or_else(|| format!("0x{:x}", index)),
434                    size: self.extract_usize(Some(alloc), "size").unwrap_or(0),
435                    var_name: self.extract_string(Some(alloc), "var_name"),
436                    type_name: self.extract_string(Some(alloc), "type_name"),
437                    scope_name: self
438                        .extract_string(Some(alloc), "scope_name")
439                        .or_else(|| self.extract_string(Some(alloc), "scope")),
440                    timestamp_alloc: self
441                        .extract_u64(Some(alloc), "timestamp_alloc")
442                        .or_else(|| self.extract_u64(Some(alloc), "timestamp"))
443                        .unwrap_or(0),
444                    timestamp_dealloc: self.extract_u64(Some(alloc), "timestamp_dealloc"),
445                    stack_trace: self.extract_string_array(Some(alloc), "stack_trace"),
446                    is_leaked: self.extract_bool(Some(alloc), "is_leaked").unwrap_or(false),
447                    lifetime_ms: self.extract_u64(Some(alloc), "lifetime_ms"),
448                };
449                normalized_allocations.push(allocation_info);
450            }
451        }
452
453        println!("📊 Normalized {} allocations", normalized_allocations.len());
454        Ok(normalized_allocations)
455    }
456
457    /// Normalize performance data
458    fn normalize_performance(
459        &self,
460        multi_source: &HashMap<String, Value>,
461    ) -> Result<PerformanceMetrics, NormalizationError> {
462        let performance_data = multi_source.get("performance");
463        let export_perf = performance_data.and_then(|data| data.get("export_performance"));
464        let memory_perf = performance_data.and_then(|data| data.get("memory_performance"));
465        let alloc_dist = performance_data.and_then(|data| data.get("allocation_distribution"));
466        let opt_status = performance_data.and_then(|data| data.get("optimization_status"));
467
468        Ok(PerformanceMetrics {
469            processing_time_ms: self
470                .extract_u64(export_perf, "total_processing_time_ms")
471                .unwrap_or(0),
472            allocations_per_second: export_perf
473                .and_then(|data| data.get("processing_rate"))
474                .and_then(|rate| self.extract_f64(Some(rate), "allocations_per_second"))
475                .unwrap_or(0.0),
476            memory_efficiency: self
477                .extract_f64(memory_perf, "memory_efficiency")
478                .unwrap_or(0.0),
479            optimization_status: OptimizationStatus {
480                parallel_processing: self
481                    .extract_bool(opt_status, "parallel_processing")
482                    .unwrap_or(false),
483                schema_validation: self
484                    .extract_bool(opt_status, "schema_validation")
485                    .unwrap_or(false),
486                streaming_enabled: self
487                    .extract_bool(opt_status, "streaming_enabled")
488                    .unwrap_or(false),
489                batch_size: self.extract_usize(opt_status, "batch_size"),
490                buffer_size_kb: self.extract_usize(opt_status, "buffer_size_kb"),
491            },
492            allocation_distribution: AllocationDistribution {
493                tiny: self.extract_usize(alloc_dist, "tiny").unwrap_or(0),
494                small: self.extract_usize(alloc_dist, "small").unwrap_or(0),
495                medium: self.extract_usize(alloc_dist, "medium").unwrap_or(0),
496                large: self.extract_usize(alloc_dist, "large").unwrap_or(0),
497                massive: self.extract_usize(alloc_dist, "massive").unwrap_or(0),
498            },
499        })
500    }
501
502    /// Normalize lifecycle data
503    fn normalize_lifecycle(
504        &self,
505        multi_source: &HashMap<String, Value>,
506    ) -> Result<LifecycleAnalysis, NormalizationError> {
507        let empty_object = Value::Object(serde_json::Map::new());
508        let lifecycle_data = multi_source.get("lifetime").unwrap_or(&empty_object);
509
510        Ok(LifecycleAnalysis {
511            lifecycle_events: lifecycle_data
512                .get("lifecycle_events")
513                .and_then(|events| events.as_array())
514                .cloned()
515                .unwrap_or_default(),
516            scope_analysis: lifecycle_data
517                .get("scope_analysis")
518                .and_then(|scope| scope.as_object())
519                .map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
520                .unwrap_or_default(),
521            variable_lifetimes: lifecycle_data
522                .get("variable_lifetimes")
523                .and_then(|lifetimes| lifetimes.as_object())
524                .map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
525                .unwrap_or_default(),
526        })
527    }
528
529    /// Normalize security data
530    fn normalize_security(
531        &self,
532        multi_source: &HashMap<String, Value>,
533    ) -> Result<SecurityAnalysis, NormalizationError> {
534        let security_data = multi_source.get("security_violations");
535        let security_summary = security_data
536            .and_then(|data| data.get("security_summary"))
537            .and_then(|summary| summary.get("security_analysis_summary"));
538        let severity = security_summary.and_then(|summary| summary.get("severity_breakdown"));
539
540        Ok(SecurityAnalysis {
541            total_violations: self
542                .extract_usize(security_summary, "total_violations")
543                .unwrap_or(0),
544            risk_level: security_summary
545                .and_then(|summary| summary.get("risk_assessment"))
546                .and_then(|risk| self.extract_string(Some(risk), "risk_level"))
547                .unwrap_or_else(|| "Unknown".to_string()),
548            severity_breakdown: SeverityBreakdown {
549                critical: self.extract_usize(severity, "critical").unwrap_or(0),
550                high: self.extract_usize(severity, "high").unwrap_or(0),
551                medium: self.extract_usize(severity, "medium").unwrap_or(0),
552                low: self.extract_usize(severity, "low").unwrap_or(0),
553                info: self.extract_usize(severity, "info").unwrap_or(0),
554            },
555            violation_reports: security_data
556                .and_then(|data| data.get("violation_reports"))
557                .and_then(|reports| reports.as_array())
558                .cloned()
559                .unwrap_or_default(),
560            recommendations: security_data
561                .and_then(|data| data.get("analysis_recommendations"))
562                .and_then(|recs| recs.as_array())
563                .map(|arr| {
564                    arr.iter()
565                        .filter_map(|v| v.as_str().map(|s| s.to_string()))
566                        .collect()
567                })
568                .unwrap_or_default(),
569        })
570    }
571
572    /// Normalize complex types data
573    fn normalize_complex_types(
574        &self,
575        multi_source: &HashMap<String, Value>,
576    ) -> Result<ComplexTypeAnalysis, NormalizationError> {
577        let empty_object = Value::Object(serde_json::Map::new());
578        let complex_data = multi_source.get("complex_types").unwrap_or(&empty_object);
579        let categorized = complex_data.get("categorized_types");
580        let summary = complex_data.get("summary");
581        let complexity_dist = summary.and_then(|s| s.get("complexity_distribution"));
582
583        Ok(ComplexTypeAnalysis {
584            categorized_types: CategorizedTypes {
585                collections: categorized
586                    .and_then(|cat| cat.get("collections"))
587                    .and_then(|coll| coll.as_array())
588                    .cloned()
589                    .unwrap_or_default(),
590                generic_types: categorized
591                    .and_then(|cat| cat.get("generic_types"))
592                    .and_then(|gen| gen.as_array())
593                    .cloned()
594                    .unwrap_or_default(),
595                smart_pointers: categorized
596                    .and_then(|cat| cat.get("smart_pointers"))
597                    .and_then(|smart| smart.as_array())
598                    .cloned()
599                    .unwrap_or_default(),
600                trait_objects: categorized
601                    .and_then(|cat| cat.get("trait_objects"))
602                    .and_then(|traits| traits.as_array())
603                    .cloned()
604                    .unwrap_or_default(),
605            },
606            complex_type_analysis: complex_data
607                .get("complex_type_analysis")
608                .and_then(|analysis| analysis.as_array())
609                .cloned()
610                .unwrap_or_default(),
611            summary: ComplexTypeSummary {
612                total_complex_types: self
613                    .extract_usize(summary, "total_complex_types")
614                    .unwrap_or(0),
615                complexity_distribution: ComplexityDistribution {
616                    low_complexity: self
617                        .extract_usize(complexity_dist, "low_complexity")
618                        .unwrap_or(0),
619                    medium_complexity: self
620                        .extract_usize(complexity_dist, "medium_complexity")
621                        .unwrap_or(0),
622                    high_complexity: self
623                        .extract_usize(complexity_dist, "high_complexity")
624                        .unwrap_or(0),
625                    very_high_complexity: self
626                        .extract_usize(complexity_dist, "very_high_complexity")
627                        .unwrap_or(0),
628                },
629            },
630        })
631    }
632
633    /// Normalize variable relationships data
634    fn normalize_variable_relationships(
635        &self,
636        multi_source: &HashMap<String, Value>,
637    ) -> Result<VariableRelationships, NormalizationError> {
638        let empty_object = Value::Object(serde_json::Map::new());
639        let var_data = multi_source
640            .get("variable_relationships")
641            .unwrap_or(&empty_object);
642
643        Ok(VariableRelationships {
644            relationships: var_data
645                .get("variable_relationships")
646                .and_then(|rels| rels.as_array())
647                .cloned()
648                .unwrap_or_default(),
649            registry: var_data
650                .get("variable_registry")
651                .and_then(|reg| reg.as_object())
652                .map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
653                .unwrap_or_default(),
654            dependency_graph: var_data
655                .get("dependency_graph")
656                .and_then(|graph| graph.as_object())
657                .map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
658                .unwrap_or_default(),
659            scope_hierarchy: var_data
660                .get("scope_hierarchy")
661                .and_then(|hierarchy| hierarchy.as_object())
662                .map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
663                .unwrap_or_default(),
664        })
665    }
666
667    /// Normalize metadata
668    fn normalize_metadata(
669        &self,
670        multi_source: &HashMap<String, Value>,
671    ) -> Result<AnalysisMetadata, NormalizationError> {
672        let memory_data = multi_source.get("memory_analysis");
673        let metadata = memory_data.and_then(|data| data.get("metadata"));
674
675        Ok(AnalysisMetadata {
676            timestamp: self.extract_u64(metadata, "timestamp").unwrap_or_else(|| {
677                std::time::SystemTime::now()
678                    .duration_since(std::time::UNIX_EPOCH)
679                    .unwrap_or_default()
680                    .as_secs()
681            }),
682            export_version: self
683                .extract_string(metadata, "export_version")
684                .unwrap_or_else(|| "2.0".to_string()),
685            analysis_type: self
686                .extract_string(metadata, "analysis_type")
687                .unwrap_or_else(|| "integrated_analysis".to_string()),
688            data_integrity_hash: self.extract_string(metadata, "data_integrity_hash"),
689        })
690    }
691
692    /// Validate unified data structure
693    fn validate_unified_data(&self, data: &UnifiedMemoryData) -> Result<(), NormalizationError> {
694        // Basic validation checks
695        if data.stats.active_memory > data.stats.peak_memory && data.stats.peak_memory > 0 {
696            return Err(NormalizationError::ValidationError(
697                "Active memory cannot exceed peak memory".to_string(),
698            ));
699        }
700
701        if data.stats.active_allocations > data.stats.total_allocations
702            && data.stats.total_allocations > 0
703        {
704            return Err(NormalizationError::ValidationError(
705                "Active allocations cannot exceed total allocations".to_string(),
706            ));
707        }
708
709        // Validate allocation data consistency
710        let actual_active_count = data
711            .allocations
712            .iter()
713            .filter(|alloc| alloc.timestamp_dealloc.is_none())
714            .count();
715
716        if actual_active_count != data.stats.active_allocations && data.stats.active_allocations > 0
717        {
718            println!(
719                "⚠️  Warning: Active allocation count mismatch (stats: {}, actual: {})",
720                data.stats.active_allocations, actual_active_count
721            );
722        }
723
724        println!("✅ Data validation passed");
725        Ok(())
726    }
727
728    // Helper methods for data extraction
729
730    fn extract_usize(&self, data: Option<&Value>, field: &str) -> Option<usize> {
731        data?.get(field)?.as_u64().map(|v| v as usize)
732    }
733
734    fn extract_u64(&self, data: Option<&Value>, field: &str) -> Option<u64> {
735        data?.get(field)?.as_u64()
736    }
737
738    fn extract_f64(&self, data: Option<&Value>, field: &str) -> Option<f64> {
739        data?.get(field)?.as_f64()
740    }
741
742    fn extract_bool(&self, data: Option<&Value>, field: &str) -> Option<bool> {
743        data?.get(field)?.as_bool()
744    }
745
746    fn extract_string(&self, data: Option<&Value>, field: &str) -> Option<String> {
747        data?.get(field)?.as_str().map(|s| s.to_string())
748    }
749
750    fn extract_string_array(&self, data: Option<&Value>, field: &str) -> Option<Vec<String>> {
751        data?
752            .get(field)?
753            .as_array()?
754            .iter()
755            .map(|v| v.as_str().map(|s| s.to_string()))
756            .collect()
757    }
758}
759
760impl Default for DataNormalizer {
761    fn default() -> Self {
762        Self::new()
763    }
764}