1use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use std::collections::HashMap;
9use std::error::Error;
10use std::fmt;
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct UnifiedMemoryData {
15 pub stats: MemoryStatistics,
17
18 pub allocations: Vec<AllocationInfo>,
20
21 pub performance: PerformanceMetrics,
23
24 pub lifecycle: LifecycleAnalysis,
26
27 pub security: SecurityAnalysis,
29
30 pub complex_types: ComplexTypeAnalysis,
32
33 pub variable_relationships: VariableRelationships,
35
36 pub metadata: AnalysisMetadata,
38
39 #[serde(rename = "_multiSource")]
41 pub multi_source: HashMap<String, Value>,
42}
43
44#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct MemoryStatistics {
47 pub active_memory: usize,
49 pub active_allocations: usize,
51 pub peak_memory: usize,
53 pub total_allocations: usize,
55 pub total_allocated: usize,
57 pub memory_efficiency: f64,
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct AllocationInfo {
64 pub ptr: String,
66 pub size: usize,
68 pub var_name: Option<String>,
70 pub type_name: Option<String>,
72 pub scope_name: Option<String>,
74 pub timestamp_alloc: u64,
76 pub timestamp_dealloc: Option<u64>,
78 pub stack_trace: Option<Vec<String>>,
80 pub is_leaked: bool,
82 pub lifetime_ms: Option<u64>,
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct PerformanceMetrics {
89 pub processing_time_ms: u64,
91 pub allocations_per_second: f64,
93 pub memory_efficiency: f64,
95 pub optimization_status: OptimizationStatus,
97 pub allocation_distribution: AllocationDistribution,
99}
100
101#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct OptimizationStatus {
104 pub parallel_processing: bool,
106 pub schema_validation: bool,
108 pub streaming_enabled: bool,
110 pub batch_size: Option<usize>,
112 pub buffer_size_kb: Option<usize>,
114}
115
116#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct AllocationDistribution {
119 pub tiny: usize,
121 pub small: usize,
123 pub medium: usize,
125 pub large: usize,
127 pub massive: usize,
129}
130
131#[derive(Debug, Clone, Serialize, Deserialize)]
133pub struct LifecycleAnalysis {
134 pub lifecycle_events: Vec<Value>,
136 pub scope_analysis: HashMap<String, Value>,
138 pub variable_lifetimes: HashMap<String, Value>,
140}
141
142#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct SecurityAnalysis {
145 pub total_violations: usize,
147 pub risk_level: String,
149 pub severity_breakdown: SeverityBreakdown,
151 pub violation_reports: Vec<Value>,
153 pub recommendations: Vec<String>,
155}
156
157#[derive(Debug, Clone, Serialize, Deserialize)]
159pub struct SeverityBreakdown {
160 pub critical: usize,
162 pub high: usize,
164 pub medium: usize,
166 pub low: usize,
168 pub info: usize,
170}
171
172#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct ComplexTypeAnalysis {
175 pub categorized_types: CategorizedTypes,
177 pub complex_type_analysis: Vec<Value>,
179 pub summary: ComplexTypeSummary,
181}
182
183#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct CategorizedTypes {
186 pub collections: Vec<Value>,
188 pub generic_types: Vec<Value>,
190 pub smart_pointers: Vec<Value>,
192 pub trait_objects: Vec<Value>,
194}
195
196#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct ComplexTypeSummary {
199 pub total_complex_types: usize,
201 pub complexity_distribution: ComplexityDistribution,
203}
204
205#[derive(Debug, Clone, Serialize, Deserialize)]
207pub struct ComplexityDistribution {
208 pub low_complexity: usize,
210 pub medium_complexity: usize,
212 pub high_complexity: usize,
214 pub very_high_complexity: usize,
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
220pub struct VariableRelationships {
221 pub relationships: Vec<Value>,
223 pub registry: HashMap<String, Value>,
225 pub dependency_graph: HashMap<String, Value>,
227 pub scope_hierarchy: HashMap<String, Value>,
229}
230
231#[derive(Debug, Clone, Serialize, Deserialize)]
233pub struct AnalysisMetadata {
234 pub timestamp: u64,
236 pub export_version: String,
238 pub analysis_type: String,
240 pub data_integrity_hash: Option<String>,
242}
243
244#[derive(Debug)]
246pub enum NormalizationError {
247 MissingField(String),
249 InvalidType(String),
251 ValidationError(String),
253 JsonError(serde_json::Error),
255}
256
257impl fmt::Display for NormalizationError {
258 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
259 match self {
260 NormalizationError::MissingField(field) => {
261 write!(f, "Missing required field: {}", field)
262 }
263 NormalizationError::InvalidType(msg) => {
264 write!(f, "Invalid data type: {}", msg)
265 }
266 NormalizationError::ValidationError(msg) => {
267 write!(f, "Data validation error: {}", msg)
268 }
269 NormalizationError::JsonError(err) => {
270 write!(f, "JSON error: {}", err)
271 }
272 }
273 }
274}
275
276impl Error for NormalizationError {}
277
278impl From<serde_json::Error> for NormalizationError {
279 fn from(err: serde_json::Error) -> Self {
280 NormalizationError::JsonError(err)
281 }
282}
283
284pub struct DataNormalizer {
286 validation_enabled: bool,
288 #[allow(dead_code)]
290 default_values: HashMap<String, Value>,
291}
292
293impl DataNormalizer {
294 pub fn new() -> Self {
296 Self {
297 validation_enabled: true,
298 default_values: Self::create_default_values(),
299 }
300 }
301
302 pub fn without_validation() -> Self {
304 Self {
305 validation_enabled: false,
306 default_values: Self::create_default_values(),
307 }
308 }
309
310 pub fn normalize(
312 &self,
313 multi_source: &HashMap<String, Value>,
314 ) -> Result<UnifiedMemoryData, NormalizationError> {
315 println!("🔄 Starting data normalization...");
316
317 let stats = self.normalize_memory_stats(multi_source)?;
319 let allocations = self.normalize_allocations(multi_source)?;
320 let performance = self.normalize_performance(multi_source)?;
321 let lifecycle = self.normalize_lifecycle(multi_source)?;
322 let security = self.normalize_security(multi_source)?;
323 let complex_types = self.normalize_complex_types(multi_source)?;
324 let variable_relationships = self.normalize_variable_relationships(multi_source)?;
325 let metadata = self.normalize_metadata(multi_source)?;
326
327 let unified = UnifiedMemoryData {
328 stats,
329 allocations,
330 performance,
331 lifecycle,
332 security,
333 complex_types,
334 variable_relationships,
335 metadata,
336 multi_source: multi_source.clone(),
337 };
338
339 if self.validation_enabled {
341 self.validate_unified_data(&unified)?;
342 }
343
344 println!("✅ Data normalization completed successfully");
345 Ok(unified)
346 }
347
348 fn create_default_values() -> HashMap<String, Value> {
350 let mut defaults = HashMap::new();
351 defaults.insert("active_memory".to_string(), Value::Number(0.into()));
352 defaults.insert("active_allocations".to_string(), Value::Number(0.into()));
353 defaults.insert("peak_memory".to_string(), Value::Number(0.into()));
354 defaults.insert("total_allocations".to_string(), Value::Number(0.into()));
355 defaults.insert("total_allocated".to_string(), Value::Number(0.into()));
356 defaults.insert(
357 "memory_efficiency".to_string(),
358 Value::Number(serde_json::Number::from_f64(0.0).unwrap()),
359 );
360 defaults
361 }
362
363 fn normalize_memory_stats(
365 &self,
366 multi_source: &HashMap<String, Value>,
367 ) -> Result<MemoryStatistics, NormalizationError> {
368 let memory_data = multi_source.get("memory_analysis");
370 let performance_data = multi_source.get("performance");
371
372 let memory_stats = memory_data
373 .and_then(|data| data.get("memory_stats"))
374 .or_else(|| memory_data.and_then(|data| data.get("stats")));
375
376 let perf_memory = performance_data.and_then(|data| data.get("memory_performance"));
377
378 let metadata = memory_data.and_then(|data| data.get("metadata"));
379
380 Ok(MemoryStatistics {
381 active_memory: self
382 .extract_usize(memory_stats, "active_memory")
383 .or_else(|| self.extract_usize(perf_memory, "active_memory"))
384 .unwrap_or(0),
385 active_allocations: self.count_active_allocations(memory_data),
386 peak_memory: self
387 .extract_usize(memory_stats, "peak_memory")
388 .or_else(|| self.extract_usize(perf_memory, "peak_memory"))
389 .unwrap_or(0),
390 total_allocations: self
391 .extract_usize(memory_stats, "total_allocations")
392 .or_else(|| self.extract_usize(perf_memory, "total_allocated"))
393 .or_else(|| self.extract_usize(metadata, "total_allocations"))
394 .unwrap_or(0),
395 total_allocated: self
396 .extract_usize(memory_stats, "total_allocated")
397 .or_else(|| self.extract_usize(perf_memory, "total_allocated"))
398 .unwrap_or(0),
399 memory_efficiency: self
400 .extract_f64(perf_memory, "memory_efficiency")
401 .unwrap_or(0.0),
402 })
403 }
404
405 fn count_active_allocations(&self, memory_data: Option<&Value>) -> usize {
407 memory_data
408 .and_then(|data| data.get("allocations"))
409 .and_then(|allocs| allocs.as_array())
410 .map(|arr| arr.len())
411 .unwrap_or(0)
412 }
413
414 fn normalize_allocations(
416 &self,
417 multi_source: &HashMap<String, Value>,
418 ) -> Result<Vec<AllocationInfo>, NormalizationError> {
419 let memory_data = multi_source.get("memory_analysis");
420 let empty_vec = vec![];
421 let allocations_array = memory_data
422 .and_then(|data| data.get("allocations"))
423 .and_then(|allocs| allocs.as_array())
424 .unwrap_or(&empty_vec);
425
426 let mut normalized_allocations = Vec::new();
427
428 for (index, alloc) in allocations_array.iter().enumerate() {
429 if let Some(_alloc_obj) = alloc.as_object() {
430 let allocation_info = AllocationInfo {
431 ptr: self
432 .extract_string(Some(alloc), "ptr")
433 .unwrap_or_else(|| format!("0x{:x}", index)),
434 size: self.extract_usize(Some(alloc), "size").unwrap_or(0),
435 var_name: self.extract_string(Some(alloc), "var_name"),
436 type_name: self.extract_string(Some(alloc), "type_name"),
437 scope_name: self
438 .extract_string(Some(alloc), "scope_name")
439 .or_else(|| self.extract_string(Some(alloc), "scope")),
440 timestamp_alloc: self
441 .extract_u64(Some(alloc), "timestamp_alloc")
442 .or_else(|| self.extract_u64(Some(alloc), "timestamp"))
443 .unwrap_or(0),
444 timestamp_dealloc: self.extract_u64(Some(alloc), "timestamp_dealloc"),
445 stack_trace: self.extract_string_array(Some(alloc), "stack_trace"),
446 is_leaked: self.extract_bool(Some(alloc), "is_leaked").unwrap_or(false),
447 lifetime_ms: self.extract_u64(Some(alloc), "lifetime_ms"),
448 };
449 normalized_allocations.push(allocation_info);
450 }
451 }
452
453 println!("📊 Normalized {} allocations", normalized_allocations.len());
454 Ok(normalized_allocations)
455 }
456
457 fn normalize_performance(
459 &self,
460 multi_source: &HashMap<String, Value>,
461 ) -> Result<PerformanceMetrics, NormalizationError> {
462 let performance_data = multi_source.get("performance");
463 let export_perf = performance_data.and_then(|data| data.get("export_performance"));
464 let memory_perf = performance_data.and_then(|data| data.get("memory_performance"));
465 let alloc_dist = performance_data.and_then(|data| data.get("allocation_distribution"));
466 let opt_status = performance_data.and_then(|data| data.get("optimization_status"));
467
468 Ok(PerformanceMetrics {
469 processing_time_ms: self
470 .extract_u64(export_perf, "total_processing_time_ms")
471 .unwrap_or(0),
472 allocations_per_second: export_perf
473 .and_then(|data| data.get("processing_rate"))
474 .and_then(|rate| self.extract_f64(Some(rate), "allocations_per_second"))
475 .unwrap_or(0.0),
476 memory_efficiency: self
477 .extract_f64(memory_perf, "memory_efficiency")
478 .unwrap_or(0.0),
479 optimization_status: OptimizationStatus {
480 parallel_processing: self
481 .extract_bool(opt_status, "parallel_processing")
482 .unwrap_or(false),
483 schema_validation: self
484 .extract_bool(opt_status, "schema_validation")
485 .unwrap_or(false),
486 streaming_enabled: self
487 .extract_bool(opt_status, "streaming_enabled")
488 .unwrap_or(false),
489 batch_size: self.extract_usize(opt_status, "batch_size"),
490 buffer_size_kb: self.extract_usize(opt_status, "buffer_size_kb"),
491 },
492 allocation_distribution: AllocationDistribution {
493 tiny: self.extract_usize(alloc_dist, "tiny").unwrap_or(0),
494 small: self.extract_usize(alloc_dist, "small").unwrap_or(0),
495 medium: self.extract_usize(alloc_dist, "medium").unwrap_or(0),
496 large: self.extract_usize(alloc_dist, "large").unwrap_or(0),
497 massive: self.extract_usize(alloc_dist, "massive").unwrap_or(0),
498 },
499 })
500 }
501
502 fn normalize_lifecycle(
504 &self,
505 multi_source: &HashMap<String, Value>,
506 ) -> Result<LifecycleAnalysis, NormalizationError> {
507 let empty_object = Value::Object(serde_json::Map::new());
508 let lifecycle_data = multi_source.get("lifetime").unwrap_or(&empty_object);
509
510 Ok(LifecycleAnalysis {
511 lifecycle_events: lifecycle_data
512 .get("lifecycle_events")
513 .and_then(|events| events.as_array())
514 .cloned()
515 .unwrap_or_default(),
516 scope_analysis: lifecycle_data
517 .get("scope_analysis")
518 .and_then(|scope| scope.as_object())
519 .map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
520 .unwrap_or_default(),
521 variable_lifetimes: lifecycle_data
522 .get("variable_lifetimes")
523 .and_then(|lifetimes| lifetimes.as_object())
524 .map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
525 .unwrap_or_default(),
526 })
527 }
528
529 fn normalize_security(
531 &self,
532 multi_source: &HashMap<String, Value>,
533 ) -> Result<SecurityAnalysis, NormalizationError> {
534 let security_data = multi_source.get("security_violations");
535 let security_summary = security_data
536 .and_then(|data| data.get("security_summary"))
537 .and_then(|summary| summary.get("security_analysis_summary"));
538 let severity = security_summary.and_then(|summary| summary.get("severity_breakdown"));
539
540 Ok(SecurityAnalysis {
541 total_violations: self
542 .extract_usize(security_summary, "total_violations")
543 .unwrap_or(0),
544 risk_level: security_summary
545 .and_then(|summary| summary.get("risk_assessment"))
546 .and_then(|risk| self.extract_string(Some(risk), "risk_level"))
547 .unwrap_or_else(|| "Unknown".to_string()),
548 severity_breakdown: SeverityBreakdown {
549 critical: self.extract_usize(severity, "critical").unwrap_or(0),
550 high: self.extract_usize(severity, "high").unwrap_or(0),
551 medium: self.extract_usize(severity, "medium").unwrap_or(0),
552 low: self.extract_usize(severity, "low").unwrap_or(0),
553 info: self.extract_usize(severity, "info").unwrap_or(0),
554 },
555 violation_reports: security_data
556 .and_then(|data| data.get("violation_reports"))
557 .and_then(|reports| reports.as_array())
558 .cloned()
559 .unwrap_or_default(),
560 recommendations: security_data
561 .and_then(|data| data.get("analysis_recommendations"))
562 .and_then(|recs| recs.as_array())
563 .map(|arr| {
564 arr.iter()
565 .filter_map(|v| v.as_str().map(|s| s.to_string()))
566 .collect()
567 })
568 .unwrap_or_default(),
569 })
570 }
571
572 fn normalize_complex_types(
574 &self,
575 multi_source: &HashMap<String, Value>,
576 ) -> Result<ComplexTypeAnalysis, NormalizationError> {
577 let empty_object = Value::Object(serde_json::Map::new());
578 let complex_data = multi_source.get("complex_types").unwrap_or(&empty_object);
579 let categorized = complex_data.get("categorized_types");
580 let summary = complex_data.get("summary");
581 let complexity_dist = summary.and_then(|s| s.get("complexity_distribution"));
582
583 Ok(ComplexTypeAnalysis {
584 categorized_types: CategorizedTypes {
585 collections: categorized
586 .and_then(|cat| cat.get("collections"))
587 .and_then(|coll| coll.as_array())
588 .cloned()
589 .unwrap_or_default(),
590 generic_types: categorized
591 .and_then(|cat| cat.get("generic_types"))
592 .and_then(|gen| gen.as_array())
593 .cloned()
594 .unwrap_or_default(),
595 smart_pointers: categorized
596 .and_then(|cat| cat.get("smart_pointers"))
597 .and_then(|smart| smart.as_array())
598 .cloned()
599 .unwrap_or_default(),
600 trait_objects: categorized
601 .and_then(|cat| cat.get("trait_objects"))
602 .and_then(|traits| traits.as_array())
603 .cloned()
604 .unwrap_or_default(),
605 },
606 complex_type_analysis: complex_data
607 .get("complex_type_analysis")
608 .and_then(|analysis| analysis.as_array())
609 .cloned()
610 .unwrap_or_default(),
611 summary: ComplexTypeSummary {
612 total_complex_types: self
613 .extract_usize(summary, "total_complex_types")
614 .unwrap_or(0),
615 complexity_distribution: ComplexityDistribution {
616 low_complexity: self
617 .extract_usize(complexity_dist, "low_complexity")
618 .unwrap_or(0),
619 medium_complexity: self
620 .extract_usize(complexity_dist, "medium_complexity")
621 .unwrap_or(0),
622 high_complexity: self
623 .extract_usize(complexity_dist, "high_complexity")
624 .unwrap_or(0),
625 very_high_complexity: self
626 .extract_usize(complexity_dist, "very_high_complexity")
627 .unwrap_or(0),
628 },
629 },
630 })
631 }
632
633 fn normalize_variable_relationships(
635 &self,
636 multi_source: &HashMap<String, Value>,
637 ) -> Result<VariableRelationships, NormalizationError> {
638 let empty_object = Value::Object(serde_json::Map::new());
639 let var_data = multi_source
640 .get("variable_relationships")
641 .unwrap_or(&empty_object);
642
643 Ok(VariableRelationships {
644 relationships: var_data
645 .get("variable_relationships")
646 .and_then(|rels| rels.as_array())
647 .cloned()
648 .unwrap_or_default(),
649 registry: var_data
650 .get("variable_registry")
651 .and_then(|reg| reg.as_object())
652 .map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
653 .unwrap_or_default(),
654 dependency_graph: var_data
655 .get("dependency_graph")
656 .and_then(|graph| graph.as_object())
657 .map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
658 .unwrap_or_default(),
659 scope_hierarchy: var_data
660 .get("scope_hierarchy")
661 .and_then(|hierarchy| hierarchy.as_object())
662 .map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
663 .unwrap_or_default(),
664 })
665 }
666
667 fn normalize_metadata(
669 &self,
670 multi_source: &HashMap<String, Value>,
671 ) -> Result<AnalysisMetadata, NormalizationError> {
672 let memory_data = multi_source.get("memory_analysis");
673 let metadata = memory_data.and_then(|data| data.get("metadata"));
674
675 Ok(AnalysisMetadata {
676 timestamp: self.extract_u64(metadata, "timestamp").unwrap_or_else(|| {
677 std::time::SystemTime::now()
678 .duration_since(std::time::UNIX_EPOCH)
679 .unwrap_or_default()
680 .as_secs()
681 }),
682 export_version: self
683 .extract_string(metadata, "export_version")
684 .unwrap_or_else(|| "2.0".to_string()),
685 analysis_type: self
686 .extract_string(metadata, "analysis_type")
687 .unwrap_or_else(|| "integrated_analysis".to_string()),
688 data_integrity_hash: self.extract_string(metadata, "data_integrity_hash"),
689 })
690 }
691
692 fn validate_unified_data(&self, data: &UnifiedMemoryData) -> Result<(), NormalizationError> {
694 if data.stats.active_memory > data.stats.peak_memory && data.stats.peak_memory > 0 {
696 return Err(NormalizationError::ValidationError(
697 "Active memory cannot exceed peak memory".to_string(),
698 ));
699 }
700
701 if data.stats.active_allocations > data.stats.total_allocations
702 && data.stats.total_allocations > 0
703 {
704 return Err(NormalizationError::ValidationError(
705 "Active allocations cannot exceed total allocations".to_string(),
706 ));
707 }
708
709 let actual_active_count = data
711 .allocations
712 .iter()
713 .filter(|alloc| alloc.timestamp_dealloc.is_none())
714 .count();
715
716 if actual_active_count != data.stats.active_allocations && data.stats.active_allocations > 0
717 {
718 println!(
719 "⚠️ Warning: Active allocation count mismatch (stats: {}, actual: {})",
720 data.stats.active_allocations, actual_active_count
721 );
722 }
723
724 println!("✅ Data validation passed");
725 Ok(())
726 }
727
728 fn extract_usize(&self, data: Option<&Value>, field: &str) -> Option<usize> {
731 data?.get(field)?.as_u64().map(|v| v as usize)
732 }
733
734 fn extract_u64(&self, data: Option<&Value>, field: &str) -> Option<u64> {
735 data?.get(field)?.as_u64()
736 }
737
738 fn extract_f64(&self, data: Option<&Value>, field: &str) -> Option<f64> {
739 data?.get(field)?.as_f64()
740 }
741
742 fn extract_bool(&self, data: Option<&Value>, field: &str) -> Option<bool> {
743 data?.get(field)?.as_bool()
744 }
745
746 fn extract_string(&self, data: Option<&Value>, field: &str) -> Option<String> {
747 data?.get(field)?.as_str().map(|s| s.to_string())
748 }
749
750 fn extract_string_array(&self, data: Option<&Value>, field: &str) -> Option<Vec<String>> {
751 data?
752 .get(field)?
753 .as_array()?
754 .iter()
755 .map(|v| v.as_str().map(|s| s.to_string()))
756 .collect()
757 }
758}
759
760impl Default for DataNormalizer {
761 fn default() -> Self {
762 Self::new()
763 }
764}