1use anyhow::Result;
8use std::collections::{HashMap, VecDeque};
9
10use super::types::{
11 ActivationHeatmap, ClusteringResults, DriftInfo, HiddenStateAnalysis, LayerActivationStats,
12 ModelPerformanceMetrics, RepresentationStability, TemporalDynamics,
13};
14
15#[derive(Debug)]
17#[allow(dead_code)]
18pub struct AdvancedAnalytics {
19 config: AnalyticsConfig,
21 hidden_states_history: VecDeque<HiddenStateData>,
23 performance_correlations: HashMap<String, CorrelationData>,
25 #[allow(dead_code)]
27 temporal_analysis_cache: TemporalAnalysisCache,
28 clustering_results_cache: HashMap<String, ClusteringResults>,
30}
31
32#[derive(Debug, Clone)]
34pub struct AnalyticsConfig {
35 pub max_history_samples: usize,
37 pub min_clustering_samples: usize,
39 pub default_num_clusters: usize,
41 pub temporal_analysis_window: usize,
43 pub drift_detection_sensitivity: f64,
45 pub correlation_threshold: f64,
47 pub enable_visualizations: bool,
49}
50
51#[derive(Debug, Clone)]
53pub struct HiddenStateData {
54 pub layer_name: String,
56 pub hidden_states: Vec<Vec<f64>>,
58 pub labels: Option<Vec<String>>,
60 pub timestamp: chrono::DateTime<chrono::Utc>,
62 pub training_step: usize,
64}
65
66#[derive(Debug, Clone)]
68pub struct CorrelationData {
69 pub metric_name: String,
71 pub values: VecDeque<f64>,
73 pub correlations: HashMap<String, f64>,
75 pub last_updated: chrono::DateTime<chrono::Utc>,
77}
78
79#[derive(Debug, Clone)]
81pub struct TemporalAnalysisCache {
82 pub drift_results: HashMap<String, DriftInfo>,
84 pub consistency_scores: HashMap<String, f64>,
86 pub stability_windows: HashMap<String, Vec<(usize, usize)>>,
88 pub last_analysis: chrono::DateTime<chrono::Utc>,
90}
91
92#[derive(Debug, Clone)]
94pub struct ClusteringParameters {
95 pub num_clusters: usize,
97 pub max_iterations: usize,
99 pub tolerance: f64,
101 pub random_seed: Option<u64>,
103 pub distance_metric: DistanceMetric,
105}
106
107#[derive(Debug, Clone)]
109pub enum DistanceMetric {
110 Euclidean,
112 Manhattan,
114 Cosine,
116 Minkowski { p: f64 },
118}
119
120#[derive(Debug, Clone)]
122pub struct DimensionalityReductionParams {
123 pub target_dimensions: usize,
125 pub method: ReductionMethod,
127 pub preserve_variance_ratio: f64,
129}
130
131#[derive(Debug, Clone)]
133pub enum ReductionMethod {
134 PCA,
136 TSNE { perplexity: f64 },
138 UMAP { n_neighbors: usize, min_dist: f64 },
140}
141
142#[derive(Debug, Clone)]
144pub struct VisualizationParams {
145 pub dimensions: (usize, usize),
147 pub color_scheme: ColorScheme,
149 pub include_annotations: bool,
151 pub export_format: ExportFormat,
153}
154
155#[derive(Debug, Clone)]
157pub enum ColorScheme {
158 Viridis,
160 Plasma,
162 Inferno,
164 Custom(Vec<(f64, f64, f64)>),
166}
167
168#[derive(Debug, Clone)]
170pub enum ExportFormat {
171 PNG,
173 SVG,
175 JSON,
177 CSV,
179}
180
181#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, Default)]
183pub struct StatisticalAnalysis {
184 pub means: Vec<f64>,
186 pub std_devs: Vec<f64>,
188 pub correlation_matrix: Vec<Vec<f64>>,
190 pub principal_components: Vec<Vec<f64>>,
192 pub explained_variance_ratios: Vec<f64>,
194 pub significance_tests: Vec<SignificanceTest>,
196}
197
198#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
200pub struct SignificanceTest {
201 pub test_name: String,
203 pub statistic: f64,
205 pub p_value: f64,
207 pub degrees_of_freedom: Option<usize>,
209 pub confidence_interval: Option<(f64, f64)>,
211}
212
213#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
215pub struct AnomalyDetectionResults {
216 pub anomalies: Vec<Anomaly>,
218 pub anomaly_scores: Vec<f64>,
220 pub threshold: f64,
222 pub method: AnomalyDetectionMethod,
224}
225
226#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
228pub struct Anomaly {
229 pub index: usize,
231 pub score: f64,
233 pub timestamp: chrono::DateTime<chrono::Utc>,
235 pub context: HashMap<String, String>,
237}
238
239#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
241pub enum AnomalyDetectionMethod {
242 IsolationForest { n_trees: usize },
244 LocalOutlierFactor { n_neighbors: usize },
246 OneClassSVM { nu: f64 },
248 StatisticalThreshold { n_std: f64 },
250}
251
252impl Default for AnalyticsConfig {
253 fn default() -> Self {
254 Self {
255 max_history_samples: 10000,
256 min_clustering_samples: 50,
257 default_num_clusters: 8,
258 temporal_analysis_window: 100,
259 drift_detection_sensitivity: 0.05,
260 correlation_threshold: 0.7,
261 enable_visualizations: true,
262 }
263 }
264}
265
266impl Default for ClusteringParameters {
267 fn default() -> Self {
268 Self {
269 num_clusters: 8,
270 max_iterations: 100,
271 tolerance: 1e-4,
272 random_seed: Some(42),
273 distance_metric: DistanceMetric::Euclidean,
274 }
275 }
276}
277
278impl AdvancedAnalytics {
279 pub fn new() -> Self {
281 Self {
282 config: AnalyticsConfig::default(),
283 hidden_states_history: VecDeque::new(),
284 performance_correlations: HashMap::new(),
285 temporal_analysis_cache: TemporalAnalysisCache::new(),
286 clustering_results_cache: HashMap::new(),
287 }
288 }
289
290 pub fn with_config(config: AnalyticsConfig) -> Self {
292 Self {
293 config,
294 hidden_states_history: VecDeque::new(),
295 performance_correlations: HashMap::new(),
296 temporal_analysis_cache: TemporalAnalysisCache::new(),
297 clustering_results_cache: HashMap::new(),
298 }
299 }
300
301 pub fn record_hidden_states(&mut self, hidden_states: HiddenStateData) {
303 self.hidden_states_history.push_back(hidden_states);
304
305 while self.hidden_states_history.len() > self.config.max_history_samples {
306 self.hidden_states_history.pop_front();
307 }
308 }
309
310 pub fn record_performance_metrics(&mut self, metrics: &ModelPerformanceMetrics) {
312 self.update_correlation_data("loss", metrics.loss);
313 self.update_correlation_data("throughput", metrics.throughput_samples_per_sec);
314 self.update_correlation_data("memory_usage", metrics.memory_usage_mb);
315
316 if let Some(accuracy) = metrics.accuracy {
317 self.update_correlation_data("accuracy", accuracy);
318 }
319
320 if let Some(gpu_util) = metrics.gpu_utilization {
321 self.update_correlation_data("gpu_utilization", gpu_util);
322 }
323 }
324
325 pub fn analyze_hidden_states(&self, layer_name: &str) -> Result<HiddenStateAnalysis> {
327 let layer_data: Vec<_> = self
328 .hidden_states_history
329 .iter()
330 .filter(|data| data.layer_name == layer_name)
331 .collect();
332
333 if layer_data.is_empty() {
334 return Err(anyhow::anyhow!(
335 "No hidden state data available for layer: {}",
336 layer_name
337 ));
338 }
339
340 let all_states: Vec<Vec<f64>> =
342 layer_data.iter().flat_map(|data| data.hidden_states.iter()).cloned().collect();
343
344 if all_states.is_empty() {
345 return Err(anyhow::anyhow!(
346 "No hidden states found for layer: {}",
347 layer_name
348 ));
349 }
350
351 let dimensionality = all_states[0].len();
352
353 let clustering_results = self.perform_clustering_analysis(&all_states)?;
355
356 let temporal_dynamics = self.analyze_temporal_dynamics(&layer_data)?;
358
359 let representation_stability = self.assess_representation_stability(&all_states)?;
361
362 let information_content = self.calculate_information_content(&all_states)?;
364
365 Ok(HiddenStateAnalysis {
366 dimensionality,
367 information_content,
368 clustering_results,
369 temporal_dynamics,
370 representation_stability,
371 })
372 }
373
374 pub fn perform_clustering_analysis(&self, data: &[Vec<f64>]) -> Result<ClusteringResults> {
376 if data.len() < self.config.min_clustering_samples {
377 return Err(anyhow::anyhow!("Insufficient data for clustering analysis"));
378 }
379
380 let params = ClusteringParameters::default();
381 let num_clusters = params.num_clusters.min(data.len() / 2);
382
383 let mut cluster_centers = self.initialize_cluster_centers(data, num_clusters)?;
385 let mut cluster_assignments = vec![0; data.len()];
386 #[allow(dead_code)]
387 #[allow(unused_assignments)]
388 let mut _converged = false;
389
390 for _iteration in 0..params.max_iterations {
391 let mut new_assignments = vec![0; data.len()];
393 for (i, point) in data.iter().enumerate() {
394 let mut best_distance = f64::INFINITY;
395 let mut best_cluster = 0;
396
397 for (j, center) in cluster_centers.iter().enumerate() {
398 let distance =
399 self.calculate_distance(point, center, ¶ms.distance_metric)?;
400 if distance < best_distance {
401 best_distance = distance;
402 best_cluster = j;
403 }
404 }
405 new_assignments[i] = best_cluster;
406 }
407
408 if new_assignments == cluster_assignments {
410 _converged = true;
411 break;
412 }
413 cluster_assignments = new_assignments;
414
415 cluster_centers =
417 self.update_cluster_centers(data, &cluster_assignments, num_clusters)?;
418 }
419
420 let silhouette_score =
422 self.calculate_silhouette_score(data, &cluster_assignments, &cluster_centers)?;
423
424 let inertia = self.calculate_inertia(data, &cluster_assignments, &cluster_centers)?;
426
427 Ok(ClusteringResults {
428 num_clusters,
429 cluster_centers,
430 cluster_assignments,
431 silhouette_score,
432 inertia,
433 })
434 }
435
436 pub fn analyze_temporal_dynamics(
438 &self,
439 layer_data: &[&HiddenStateData],
440 ) -> Result<TemporalDynamics> {
441 if layer_data.len() < 2 {
442 return Err(anyhow::anyhow!("Insufficient temporal data"));
443 }
444
445 let temporal_consistency = self.calculate_temporal_consistency(layer_data)?;
447
448 let change_rate = self.calculate_change_rate(layer_data)?;
450
451 let stability_windows = self.identify_stability_windows(layer_data)?;
453
454 let drift_detection = self.detect_distribution_drift(layer_data)?;
456
457 Ok(TemporalDynamics {
458 temporal_consistency,
459 change_rate,
460 stability_windows,
461 drift_detection,
462 })
463 }
464
465 pub fn assess_representation_stability(
467 &self,
468 hidden_states: &[Vec<f64>],
469 ) -> Result<RepresentationStability> {
470 if hidden_states.is_empty() {
471 return Err(anyhow::anyhow!("No hidden states provided"));
472 }
473
474 let stability_score = self.calculate_stability_score(hidden_states)?;
476
477 let variance_across_batches = self.calculate_batch_variance(hidden_states)?;
479
480 let consistency_measure = self.calculate_consistency_measure(hidden_states)?;
482
483 let robustness_to_noise = self.assess_noise_robustness(hidden_states)?;
485
486 Ok(RepresentationStability {
487 stability_score,
488 variance_across_batches,
489 consistency_measure,
490 robustness_to_noise,
491 })
492 }
493
494 pub fn generate_activation_heatmap(
496 &self,
497 layer_stats: &[LayerActivationStats],
498 ) -> Result<ActivationHeatmap> {
499 if layer_stats.is_empty() {
500 return Err(anyhow::anyhow!("No layer statistics provided"));
501 }
502
503 let mut data = Vec::new();
505 let mut min_val = f64::INFINITY;
506 let mut max_val = f64::NEG_INFINITY;
507
508 for stats in layer_stats {
509 let row = vec![
510 stats.mean_activation,
511 stats.std_activation,
512 stats.min_activation,
513 stats.max_activation,
514 stats.dead_neurons_ratio,
515 stats.saturated_neurons_ratio,
516 stats.sparsity,
517 ];
518
519 for &val in &row {
520 min_val = min_val.min(val);
521 max_val = max_val.max(val);
522 }
523
524 data.push(row);
525 }
526
527 let dimensions = (data.len(), data.first().map_or(0, |row| row.len()));
528
529 Ok(ActivationHeatmap {
530 data,
531 dimensions,
532 value_range: (min_val, max_val),
533 interpretation: "Activation statistics heatmap showing layer behavior patterns"
534 .to_string(),
535 })
536 }
537
538 pub fn detect_performance_anomalies(&self) -> Result<AnomalyDetectionResults> {
540 let mut all_values = Vec::new();
542 for correlation_data in self.performance_correlations.values() {
543 all_values.extend(correlation_data.values.iter().cloned());
544 }
545
546 if all_values.is_empty() {
547 return Err(anyhow::anyhow!("No performance data available"));
548 }
549
550 let method = AnomalyDetectionMethod::StatisticalThreshold { n_std: 2.0 };
552
553 let mean = all_values.iter().sum::<f64>() / all_values.len() as f64;
554 let variance =
555 all_values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / all_values.len() as f64;
556 let std_dev = variance.sqrt();
557
558 let threshold = mean + 2.0 * std_dev;
559
560 let mut anomalies = Vec::new();
561 let mut anomaly_scores = Vec::new();
562
563 for (i, &value) in all_values.iter().enumerate() {
564 let score = (value - mean).abs() / std_dev;
565 anomaly_scores.push(score);
566
567 if value > threshold {
568 anomalies.push(Anomaly {
569 index: i,
570 score,
571 timestamp: chrono::Utc::now(),
572 context: HashMap::new(),
573 });
574 }
575 }
576
577 Ok(AnomalyDetectionResults {
578 anomalies,
579 anomaly_scores,
580 threshold,
581 method,
582 })
583 }
584
585 pub fn calculate_correlation_matrix(&self) -> Result<Vec<Vec<f64>>> {
587 let metric_names: Vec<_> = self.performance_correlations.keys().cloned().collect();
588 let n_metrics = metric_names.len();
589
590 if n_metrics == 0 {
591 return Err(anyhow::anyhow!(
592 "No metrics available for correlation analysis"
593 ));
594 }
595
596 let mut correlation_matrix = vec![vec![0.0; n_metrics]; n_metrics];
597
598 for (i, metric1) in metric_names.iter().enumerate() {
599 for (j, metric2) in metric_names.iter().enumerate() {
600 if i == j {
601 correlation_matrix[i][j] = 1.0;
602 } else {
603 let correlation = self.calculate_correlation(metric1, metric2)?;
604 correlation_matrix[i][j] = correlation;
605 }
606 }
607 }
608
609 Ok(correlation_matrix)
610 }
611
612 pub fn perform_statistical_analysis(&self) -> Result<StatisticalAnalysis> {
614 if self.performance_correlations.is_empty() {
615 return Err(anyhow::anyhow!(
616 "No data available for statistical analysis"
617 ));
618 }
619
620 let mut means = Vec::new();
622 let mut std_devs = Vec::new();
623
624 for correlation_data in self.performance_correlations.values() {
625 let values: Vec<f64> = correlation_data.values.iter().cloned().collect();
626 if !values.is_empty() {
627 let mean = values.iter().sum::<f64>() / values.len() as f64;
628 let variance =
629 values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / values.len() as f64;
630 let std_dev = variance.sqrt();
631
632 means.push(mean);
633 std_devs.push(std_dev);
634 }
635 }
636
637 let correlation_matrix = self.calculate_correlation_matrix()?;
639
640 let principal_components = vec![vec![1.0; means.len()]; means.len()];
642 let explained_variance_ratios = vec![1.0 / means.len() as f64; means.len()];
643
644 let significance_tests = vec![SignificanceTest {
646 test_name: "Sample t-test".to_string(),
647 statistic: 1.0,
648 p_value: 0.05,
649 degrees_of_freedom: Some(means.len() - 1),
650 confidence_interval: Some((0.0, 1.0)),
651 }];
652
653 Ok(StatisticalAnalysis {
654 means,
655 std_devs,
656 correlation_matrix,
657 principal_components,
658 explained_variance_ratios,
659 significance_tests,
660 })
661 }
662
663 pub fn generate_analytics_report(&self) -> Result<AnalyticsReport> {
665 let correlation_matrix = self.calculate_correlation_matrix().unwrap_or_default();
666 let statistical_analysis = self.perform_statistical_analysis().unwrap_or_default();
667 let anomaly_detection = self.detect_performance_anomalies().unwrap_or_default();
668
669 let mut layer_analyses = HashMap::new();
671 let unique_layers: std::collections::HashSet<String> =
672 self.hidden_states_history.iter().map(|data| data.layer_name.clone()).collect();
673
674 for layer_name in unique_layers {
675 if let Ok(analysis) = self.analyze_hidden_states(&layer_name) {
676 layer_analyses.insert(layer_name, analysis);
677 }
678 }
679
680 Ok(AnalyticsReport {
681 correlation_matrix,
682 statistical_analysis,
683 layer_analyses,
684 anomaly_detection,
685 temporal_summary: self.generate_temporal_summary(),
686 recommendations: self.generate_analytics_recommendations(),
687 })
688 }
689
690 fn update_correlation_data(&mut self, metric_name: &str, value: f64) {
694 let correlation_data = self
695 .performance_correlations
696 .entry(metric_name.to_string())
697 .or_insert_with(|| CorrelationData {
698 metric_name: metric_name.to_string(),
699 values: VecDeque::new(),
700 correlations: HashMap::new(),
701 last_updated: chrono::Utc::now(),
702 });
703
704 correlation_data.values.push_back(value);
705 correlation_data.last_updated = chrono::Utc::now();
706
707 while correlation_data.values.len() > self.config.max_history_samples {
709 correlation_data.values.pop_front();
710 }
711 }
712
713 fn initialize_cluster_centers(
715 &self,
716 data: &[Vec<f64>],
717 num_clusters: usize,
718 ) -> Result<Vec<Vec<f64>>> {
719 if data.is_empty() || num_clusters == 0 {
720 return Err(anyhow::anyhow!("Invalid input for cluster initialization"));
721 }
722
723 let mut centers = Vec::new();
724 let _dimensions = data[0].len();
725
726 centers.push(data[0].clone());
728
729 for _ in 1..num_clusters {
731 if centers.len() >= data.len() {
732 break;
733 }
734
735 let mut best_distance = 0.0;
736 let mut best_point = data[0].clone();
737
738 for point in data {
739 let mut min_distance = f64::INFINITY;
740 for center in ¢ers {
741 let distance =
742 self.calculate_distance(point, center, &DistanceMetric::Euclidean)?;
743 min_distance = min_distance.min(distance);
744 }
745
746 if min_distance > best_distance {
747 best_distance = min_distance;
748 best_point = point.clone();
749 }
750 }
751
752 centers.push(best_point);
753 }
754
755 Ok(centers)
756 }
757
758 fn calculate_distance(
760 &self,
761 point1: &[f64],
762 point2: &[f64],
763 metric: &DistanceMetric,
764 ) -> Result<f64> {
765 if point1.len() != point2.len() {
766 return Err(anyhow::anyhow!("Points must have same dimensionality"));
767 }
768
769 match metric {
770 DistanceMetric::Euclidean => {
771 let sum_squared =
772 point1.iter().zip(point2.iter()).map(|(a, b)| (a - b).powi(2)).sum::<f64>();
773 Ok(sum_squared.sqrt())
774 },
775 DistanceMetric::Manhattan => {
776 let sum_abs =
777 point1.iter().zip(point2.iter()).map(|(a, b)| (a - b).abs()).sum::<f64>();
778 Ok(sum_abs)
779 },
780 DistanceMetric::Cosine => {
781 let dot_product = point1.iter().zip(point2.iter()).map(|(a, b)| a * b).sum::<f64>();
782 let norm1 = point1.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
783 let norm2 = point2.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
784
785 if norm1 == 0.0 || norm2 == 0.0 {
786 Ok(1.0)
787 } else {
788 Ok(1.0 - (dot_product / (norm1 * norm2)))
789 }
790 },
791 DistanceMetric::Minkowski { p } => {
792 let sum_powered = point1
793 .iter()
794 .zip(point2.iter())
795 .map(|(a, b)| (a - b).abs().powf(*p))
796 .sum::<f64>();
797 Ok(sum_powered.powf(1.0 / p))
798 },
799 }
800 }
801
802 fn update_cluster_centers(
804 &self,
805 data: &[Vec<f64>],
806 assignments: &[usize],
807 num_clusters: usize,
808 ) -> Result<Vec<Vec<f64>>> {
809 let dimensions = data[0].len();
810 let mut new_centers = vec![vec![0.0; dimensions]; num_clusters];
811 let mut cluster_counts = vec![0; num_clusters];
812
813 for (point, &cluster_id) in data.iter().zip(assignments.iter()) {
815 if cluster_id < num_clusters {
816 for (i, &value) in point.iter().enumerate() {
817 new_centers[cluster_id][i] += value;
818 }
819 cluster_counts[cluster_id] += 1;
820 }
821 }
822
823 for (cluster_id, count) in cluster_counts.iter().enumerate() {
825 if *count > 0 {
826 for value in &mut new_centers[cluster_id] {
827 *value /= *count as f64;
828 }
829 }
830 }
831
832 Ok(new_centers)
833 }
834
835 fn calculate_silhouette_score(
837 &self,
838 data: &[Vec<f64>],
839 assignments: &[usize],
840 centers: &[Vec<f64>],
841 ) -> Result<f64> {
842 if data.is_empty() {
843 return Ok(0.0);
844 }
845
846 let mut total_score = 0.0;
847 let mut valid_points = 0;
848
849 for (i, point) in data.iter().enumerate() {
850 let cluster_id = assignments[i];
851
852 let mut same_cluster_distances = Vec::new();
854 for (j, other_point) in data.iter().enumerate() {
855 if i != j && assignments[j] == cluster_id {
856 let distance =
857 self.calculate_distance(point, other_point, &DistanceMetric::Euclidean)?;
858 same_cluster_distances.push(distance);
859 }
860 }
861
862 let a = if same_cluster_distances.is_empty() {
863 0.0
864 } else {
865 same_cluster_distances.iter().sum::<f64>() / same_cluster_distances.len() as f64
866 };
867
868 let mut min_other_cluster_distance = f64::INFINITY;
870 for (other_cluster_id, _) in centers.iter().enumerate() {
871 if other_cluster_id != cluster_id {
872 let mut other_cluster_distances = Vec::new();
873 for (j, other_point) in data.iter().enumerate() {
874 if assignments[j] == other_cluster_id {
875 let distance = self.calculate_distance(
876 point,
877 other_point,
878 &DistanceMetric::Euclidean,
879 )?;
880 other_cluster_distances.push(distance);
881 }
882 }
883
884 if !other_cluster_distances.is_empty() {
885 let avg_distance = other_cluster_distances.iter().sum::<f64>()
886 / other_cluster_distances.len() as f64;
887 min_other_cluster_distance = min_other_cluster_distance.min(avg_distance);
888 }
889 }
890 }
891
892 let b = min_other_cluster_distance;
893
894 if a < b {
895 total_score += (b - a) / b;
896 } else if a > b {
897 total_score += (b - a) / a;
898 }
899 valid_points += 1;
902 }
903
904 Ok(if valid_points > 0 { total_score / valid_points as f64 } else { 0.0 })
905 }
906
907 fn calculate_inertia(
909 &self,
910 data: &[Vec<f64>],
911 assignments: &[usize],
912 centers: &[Vec<f64>],
913 ) -> Result<f64> {
914 let mut inertia = 0.0;
915
916 for (point, &cluster_id) in data.iter().zip(assignments.iter()) {
917 if cluster_id < centers.len() {
918 let distance = self.calculate_distance(
919 point,
920 ¢ers[cluster_id],
921 &DistanceMetric::Euclidean,
922 )?;
923 inertia += distance.powi(2);
924 }
925 }
926
927 Ok(inertia)
928 }
929
930 fn calculate_information_content(&self, hidden_states: &[Vec<f64>]) -> Result<f64> {
932 if hidden_states.is_empty() {
933 return Ok(0.0);
934 }
935
936 let dimensions = hidden_states[0].len();
937 let mut total_variance = 0.0;
938
939 for dim in 0..dimensions {
940 let values: Vec<f64> = hidden_states.iter().map(|state| state[dim]).collect();
941 if values.len() > 1 {
942 let mean = values.iter().sum::<f64>() / values.len() as f64;
943 let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>()
944 / (values.len() - 1) as f64;
945 total_variance += variance;
946 }
947 }
948
949 Ok(total_variance / dimensions as f64)
951 }
952
953 fn calculate_temporal_consistency(&self, layer_data: &[&HiddenStateData]) -> Result<f64> {
955 if layer_data.len() < 2 {
956 return Ok(1.0);
957 }
958
959 let mut consistency_scores = Vec::new();
960
961 for i in 1..layer_data.len() {
962 let prev_states = &layer_data[i - 1].hidden_states;
963 let curr_states = &layer_data[i].hidden_states;
964
965 if !prev_states.is_empty() && !curr_states.is_empty() {
966 let prev_mean = self.calculate_mean_state(prev_states);
968 let curr_mean = self.calculate_mean_state(curr_states);
969
970 if prev_mean.len() == curr_mean.len() {
971 let distance = self.calculate_distance(
972 &prev_mean,
973 &curr_mean,
974 &DistanceMetric::Euclidean,
975 )?;
976 consistency_scores.push(1.0 / (1.0 + distance));
977 }
978 }
979 }
980
981 Ok(if consistency_scores.is_empty() {
982 1.0
983 } else {
984 consistency_scores.iter().sum::<f64>() / consistency_scores.len() as f64
985 })
986 }
987
988 fn calculate_mean_state(&self, states: &[Vec<f64>]) -> Vec<f64> {
990 if states.is_empty() {
991 return Vec::new();
992 }
993
994 let dimensions = states[0].len();
995 let mut mean_state = vec![0.0; dimensions];
996
997 for state in states {
998 for (i, &value) in state.iter().enumerate() {
999 if i < dimensions {
1000 mean_state[i] += value;
1001 }
1002 }
1003 }
1004
1005 for value in &mut mean_state {
1006 *value /= states.len() as f64;
1007 }
1008
1009 mean_state
1010 }
1011
1012 fn calculate_change_rate(&self, layer_data: &[&HiddenStateData]) -> Result<f64> {
1014 if layer_data.len() < 2 {
1015 return Ok(0.0);
1016 }
1017
1018 let mut total_change = 0.0;
1019 let mut valid_comparisons = 0;
1020
1021 for i in 1..layer_data.len() {
1022 let prev_mean = self.calculate_mean_state(&layer_data[i - 1].hidden_states);
1023 let curr_mean = self.calculate_mean_state(&layer_data[i].hidden_states);
1024
1025 if !prev_mean.is_empty() && !curr_mean.is_empty() && prev_mean.len() == curr_mean.len()
1026 {
1027 let change =
1028 self.calculate_distance(&prev_mean, &curr_mean, &DistanceMetric::Euclidean)?;
1029 total_change += change;
1030 valid_comparisons += 1;
1031 }
1032 }
1033
1034 Ok(if valid_comparisons > 0 {
1035 total_change / valid_comparisons as f64
1036 } else {
1037 0.0
1038 })
1039 }
1040
1041 fn identify_stability_windows(
1043 &self,
1044 layer_data: &[&HiddenStateData],
1045 ) -> Result<Vec<(usize, usize)>> {
1046 if layer_data.len() < 3 {
1047 return Ok(Vec::new());
1048 }
1049
1050 let mut stability_windows = Vec::new();
1051 let mut window_start = 0;
1052 let stability_threshold = 0.1; for i in 1..layer_data.len() {
1055 let prev_mean = self.calculate_mean_state(&layer_data[i - 1].hidden_states);
1056 let curr_mean = self.calculate_mean_state(&layer_data[i].hidden_states);
1057
1058 if !prev_mean.is_empty() && !curr_mean.is_empty() && prev_mean.len() == curr_mean.len()
1059 {
1060 let change = self
1061 .calculate_distance(&prev_mean, &curr_mean, &DistanceMetric::Euclidean)
1062 .unwrap_or(f64::INFINITY);
1063
1064 if change > stability_threshold {
1065 if i - window_start > 2 {
1067 stability_windows.push((window_start, i - 1));
1068 }
1069 window_start = i;
1070 }
1071 }
1072 }
1073
1074 if layer_data.len() - window_start > 2 {
1076 stability_windows.push((window_start, layer_data.len() - 1));
1077 }
1078
1079 Ok(stability_windows)
1080 }
1081
1082 fn detect_distribution_drift(&self, layer_data: &[&HiddenStateData]) -> Result<DriftInfo> {
1084 if layer_data.len() < self.config.temporal_analysis_window {
1085 return Ok(DriftInfo {
1086 drift_detected: false,
1087 drift_magnitude: 0.0,
1088 drift_direction: "unknown".to_string(),
1089 onset_step: None,
1090 });
1091 }
1092
1093 let window_size = self.config.temporal_analysis_window;
1094 let mid_point = layer_data.len() / 2;
1095
1096 let early_data = &layer_data[0..window_size.min(mid_point)];
1098 let late_data = &layer_data[mid_point.max(layer_data.len() - window_size)..];
1099
1100 let early_mean = self.calculate_aggregated_mean(early_data);
1101 let late_mean = self.calculate_aggregated_mean(late_data);
1102
1103 if early_mean.len() == late_mean.len() && !early_mean.is_empty() {
1104 let drift_magnitude =
1105 self.calculate_distance(&early_mean, &late_mean, &DistanceMetric::Euclidean)?;
1106 let drift_detected = drift_magnitude > self.config.drift_detection_sensitivity;
1107
1108 Ok(DriftInfo {
1109 drift_detected,
1110 drift_magnitude,
1111 drift_direction: if drift_detected {
1112 "forward".to_string()
1113 } else {
1114 "stable".to_string()
1115 },
1116 onset_step: if drift_detected { Some(mid_point) } else { None },
1117 })
1118 } else {
1119 Ok(DriftInfo {
1120 drift_detected: false,
1121 drift_magnitude: 0.0,
1122 drift_direction: "unknown".to_string(),
1123 onset_step: None,
1124 })
1125 }
1126 }
1127
1128 fn calculate_aggregated_mean(&self, layer_data: &[&HiddenStateData]) -> Vec<f64> {
1130 let all_states: Vec<Vec<f64>> =
1131 layer_data.iter().flat_map(|data| data.hidden_states.iter()).cloned().collect();
1132
1133 self.calculate_mean_state(&all_states)
1134 }
1135
1136 fn calculate_stability_score(&self, hidden_states: &[Vec<f64>]) -> Result<f64> {
1138 if hidden_states.len() < 2 {
1139 return Ok(1.0);
1140 }
1141
1142 let mut stability_scores = Vec::new();
1143 let window_size = (hidden_states.len() / 10).max(2);
1144
1145 for i in window_size..hidden_states.len() {
1146 let current_window = &hidden_states[i - window_size..i];
1147 let mean_current = self.calculate_mean_state(current_window);
1148
1149 if i >= 2 * window_size {
1150 let prev_window = &hidden_states[i - 2 * window_size..i - window_size];
1151 let mean_prev = self.calculate_mean_state(prev_window);
1152
1153 if mean_current.len() == mean_prev.len() && !mean_current.is_empty() {
1154 let distance = self.calculate_distance(
1155 &mean_current,
1156 &mean_prev,
1157 &DistanceMetric::Euclidean,
1158 )?;
1159 stability_scores.push(1.0 / (1.0 + distance));
1160 }
1161 }
1162 }
1163
1164 Ok(if stability_scores.is_empty() {
1165 1.0
1166 } else {
1167 stability_scores.iter().sum::<f64>() / stability_scores.len() as f64
1168 })
1169 }
1170
1171 fn calculate_batch_variance(&self, hidden_states: &[Vec<f64>]) -> Result<f64> {
1173 if hidden_states.is_empty() {
1174 return Ok(0.0);
1175 }
1176
1177 let dimensions = hidden_states[0].len();
1178 let mut total_variance = 0.0;
1179
1180 for dim in 0..dimensions {
1181 let values: Vec<f64> = hidden_states.iter().map(|state| state[dim]).collect();
1182 if values.len() > 1 {
1183 let mean = values.iter().sum::<f64>() / values.len() as f64;
1184 let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>()
1185 / (values.len() - 1) as f64;
1186 total_variance += variance;
1187 }
1188 }
1189
1190 Ok(total_variance / dimensions as f64)
1191 }
1192
1193 fn calculate_consistency_measure(&self, hidden_states: &[Vec<f64>]) -> Result<f64> {
1195 if hidden_states.len() < 2 {
1196 return Ok(1.0);
1197 }
1198
1199 let mut similarities = Vec::new();
1201 let sample_size = hidden_states.len().min(100); for i in 0..sample_size {
1204 for j in (i + 1)..sample_size {
1205 let distance = self.calculate_distance(
1206 &hidden_states[i],
1207 &hidden_states[j],
1208 &DistanceMetric::Cosine,
1209 )?;
1210 similarities.push(1.0 - distance); }
1212 }
1213
1214 Ok(if similarities.is_empty() {
1215 1.0
1216 } else {
1217 similarities.iter().sum::<f64>() / similarities.len() as f64
1218 })
1219 }
1220
1221 fn assess_noise_robustness(&self, hidden_states: &[Vec<f64>]) -> Result<f64> {
1223 self.calculate_batch_variance(hidden_states).map(|variance| {
1225 1.0 / (1.0 + variance)
1227 })
1228 }
1229
1230 fn calculate_correlation(&self, metric1: &str, metric2: &str) -> Result<f64> {
1232 let data1 = self
1233 .performance_correlations
1234 .get(metric1)
1235 .ok_or_else(|| anyhow::anyhow!("Metric {} not found", metric1))?;
1236
1237 let data2 = self
1238 .performance_correlations
1239 .get(metric2)
1240 .ok_or_else(|| anyhow::anyhow!("Metric {} not found", metric2))?;
1241
1242 let values1: Vec<f64> = data1.values.iter().cloned().collect();
1243 let values2: Vec<f64> = data2.values.iter().cloned().collect();
1244
1245 if values1.len() != values2.len() || values1.is_empty() {
1246 return Ok(0.0);
1247 }
1248
1249 let mean1 = values1.iter().sum::<f64>() / values1.len() as f64;
1250 let mean2 = values2.iter().sum::<f64>() / values2.len() as f64;
1251
1252 let numerator: f64 = values1
1253 .iter()
1254 .zip(values2.iter())
1255 .map(|(x1, x2)| (x1 - mean1) * (x2 - mean2))
1256 .sum();
1257
1258 let var1: f64 = values1.iter().map(|x| (x - mean1).powi(2)).sum();
1259 let var2: f64 = values2.iter().map(|x| (x - mean2).powi(2)).sum();
1260
1261 let denominator = (var1 * var2).sqrt();
1262
1263 Ok(if denominator == 0.0 { 0.0 } else { numerator / denominator })
1264 }
1265
1266 fn generate_temporal_summary(&self) -> String {
1268 format!(
1269 "Temporal analysis: {} hidden state samples collected across {} layers. \
1270 Average stability observed with {} correlation metrics tracked.",
1271 self.hidden_states_history.len(),
1272 self.hidden_states_history
1273 .iter()
1274 .map(|data| &data.layer_name)
1275 .collect::<std::collections::HashSet<_>>()
1276 .len(),
1277 self.performance_correlations.len()
1278 )
1279 }
1280
1281 fn generate_analytics_recommendations(&self) -> Vec<String> {
1283 let mut recommendations = Vec::new();
1284
1285 if self.performance_correlations.len() < 3 {
1286 recommendations.push(
1287 "Collect more performance metrics for comprehensive correlation analysis"
1288 .to_string(),
1289 );
1290 }
1291
1292 if self.hidden_states_history.len() < 50 {
1293 recommendations
1294 .push("Increase hidden state sampling for better temporal analysis".to_string());
1295 }
1296
1297 recommendations
1298 .push("Consider implementing automated anomaly detection alerts".to_string());
1299 recommendations.push("Enable advanced visualization for better insights".to_string());
1300
1301 recommendations
1302 }
1303}
1304
1305#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1307pub struct AnalyticsReport {
1308 pub correlation_matrix: Vec<Vec<f64>>,
1310 pub statistical_analysis: StatisticalAnalysis,
1312 pub layer_analyses: HashMap<String, HiddenStateAnalysis>,
1314 pub anomaly_detection: AnomalyDetectionResults,
1316 pub temporal_summary: String,
1318 pub recommendations: Vec<String>,
1320}
1321
1322impl TemporalAnalysisCache {
1323 fn new() -> Self {
1325 Self {
1326 drift_results: HashMap::new(),
1327 consistency_scores: HashMap::new(),
1328 stability_windows: HashMap::new(),
1329 last_analysis: chrono::Utc::now(),
1330 }
1331 }
1332}
1333
1334impl Default for AnomalyDetectionResults {
1335 fn default() -> Self {
1336 Self {
1337 anomalies: Vec::new(),
1338 anomaly_scores: Vec::new(),
1339 threshold: 0.0,
1340 method: AnomalyDetectionMethod::StatisticalThreshold { n_std: 2.0 },
1341 }
1342 }
1343}
1344
1345impl Default for AdvancedAnalytics {
1346 fn default() -> Self {
1347 Self::new()
1348 }
1349}
1350
1351#[cfg(test)]
1352mod tests {
1353 use super::*;
1354
1355 #[test]
1356 fn test_advanced_analytics_creation() {
1357 let analytics = AdvancedAnalytics::new();
1358 assert_eq!(analytics.hidden_states_history.len(), 0);
1359 assert_eq!(analytics.performance_correlations.len(), 0);
1360 }
1361
1362 #[test]
1363 fn test_distance_calculation() {
1364 let analytics = AdvancedAnalytics::new();
1365 let point1 = vec![1.0, 2.0, 3.0];
1366 let point2 = vec![4.0, 5.0, 6.0];
1367
1368 let distance = analytics
1369 .calculate_distance(&point1, &point2, &DistanceMetric::Euclidean)
1370 .unwrap();
1371 assert!(distance > 0.0);
1372 }
1373
1374 #[test]
1375 fn test_clustering_parameters() {
1376 let params = ClusteringParameters::default();
1377 assert_eq!(params.num_clusters, 8);
1378 assert_eq!(params.max_iterations, 100);
1379 }
1380
1381 #[test]
1382 fn test_correlation_calculation() {
1383 let mut analytics = AdvancedAnalytics::new();
1384
1385 analytics.update_correlation_data("metric1", 1.0);
1387 analytics.update_correlation_data("metric1", 2.0);
1388 analytics.update_correlation_data("metric2", 3.0);
1389 analytics.update_correlation_data("metric2", 4.0);
1390
1391 assert_eq!(analytics.performance_correlations.len(), 2);
1392 }
1393}