1use super::config::PerformanceConfig;
4use super::meta_learning::MetaLearningResult;
5use super::TrainingMetrics;
6use crate::error::Result;
7use scirs2_core::ndarray::{Array1, Array2};
8use scirs2_core::numeric::Float;
9use serde::{Deserialize, Serialize};
10use std::collections::{BTreeMap, HashMap, VecDeque};
11use std::fmt::Debug;
12use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct PerformanceMetrics {
17 pub training_metrics: TrainingMetricsCollection,
19
20 pub inference_metrics: InferenceMetricsCollection,
22
23 pub memory_metrics: MemoryMetricsCollection,
25
26 pub timing_metrics: TimingMetricsCollection,
28
29 pub quality_metrics: QualityMetricsCollection,
31
32 pub resource_metrics: ResourceMetricsCollection,
34
35 pub optimization_metrics: OptimizationMetricsCollection,
37}
38
39pub struct TransformerPerformanceTracker<T: Float + Debug + Send + Sync + 'static> {
41 config: PerformanceConfig,
43
44 metrics: PerformanceMetrics,
46
47 loss_history: VecDeque<f64>,
49
50 step_timings: VecDeque<Duration>,
52
53 meta_results_history: VecDeque<MetaLearningResult<T>>,
55
56 training_history: VecDeque<TrainingMetrics>,
58
59 memory_samples: VecDeque<MemorySample>,
61
62 baselines: PerformanceBaselines,
64
65 alert_thresholds: AlertThresholds,
67
68 trends: PerformanceTrends,
70
71 profiling_data: ProfilingData,
73
74 session_start: Instant,
76}
77
78impl<T: Float + Debug + Send + Sync + 'static> Default for TransformerPerformanceTracker<T> {
79 fn default() -> Self {
80 Self::new()
81 }
82}
83
84impl<T: Float + Debug + Send + Sync + 'static> TransformerPerformanceTracker<T> {
85 pub fn new() -> Self {
87 let config = PerformanceConfig::default();
88
89 Self {
90 config,
91 metrics: PerformanceMetrics::new(),
92 loss_history: VecDeque::new(),
93 step_timings: VecDeque::new(),
94 meta_results_history: VecDeque::new(),
95 training_history: VecDeque::new(),
96 memory_samples: VecDeque::new(),
97 baselines: PerformanceBaselines::new(),
98 alert_thresholds: AlertThresholds::default(),
99 trends: PerformanceTrends::new(),
100 profiling_data: ProfilingData::new(),
101 session_start: Instant::now(),
102 }
103 }
104
105 pub fn with_config(config: PerformanceConfig) -> Self {
107 let mut tracker = Self::new();
108 tracker.config = config;
109 tracker
110 }
111
112 pub fn record_optimization_step(&mut self, duration: Duration, update: &Array1<T>) {
114 self.step_timings.push_back(duration);
116 if self.step_timings.len() > self.config.max_history_size {
117 self.step_timings.pop_front();
118 }
119
120 self.metrics.timing_metrics.record_step_time(duration);
122
123 let update_norm = self.compute_array_norm(update);
125 self.metrics
126 .optimization_metrics
127 .record_update_norm(update_norm);
128
129 self.trends.update_step_timing(duration);
131
132 self.check_performance_alerts(duration, update_norm);
134 }
135
136 pub fn record_training_epoch(&mut self, metrics: TrainingMetrics) {
138 self.training_history.push_back(metrics.clone());
139 if self.training_history.len() > self.config.max_history_size {
140 self.training_history.pop_front();
141 }
142
143 self.metrics.training_metrics.record_epoch(
145 metrics.loss,
146 metrics.training_time,
147 metrics.convergence_rate,
148 );
149
150 self.trends.update_training_loss(metrics.loss);
152 self.trends
153 .update_convergence_rate(metrics.convergence_rate);
154
155 self.check_convergence_alerts(&metrics);
157 }
158
159 pub fn record_meta_step(&mut self, result: MetaLearningResult<T>) {
161 self.meta_results_history.push_back(result.clone());
162 if self.meta_results_history.len() > 100 {
163 self.meta_results_history.pop_front();
164 }
165
166 self.metrics.training_metrics.record_meta_step(
168 result.meta_loss,
169 result.computation_time,
170 result.task_adaptations.len(),
171 );
172
173 self.trends.update_meta_loss(result.meta_loss);
175 self.trends
176 .update_adaptation_efficiency(result.task_adaptations.len() as f64);
177 }
178
179 pub fn record_inference(&mut self, input_size: usize, duration: Duration, output_quality: f64) {
181 self.metrics
182 .inference_metrics
183 .record_inference(input_size, duration, output_quality);
184
185 let throughput = input_size as f64 / duration.as_secs_f64();
187 self.metrics.inference_metrics.update_throughput(throughput);
188
189 self.trends.update_inference_latency(duration);
191 self.trends.update_inference_quality(output_quality);
192 }
193
194 pub fn record_memory_usage(&mut self, usage: MemoryUsage) {
196 let sample = MemorySample {
197 timestamp: std::time::SystemTime::now(),
198 usage,
199 };
200
201 self.memory_samples.push_back(sample.clone());
202 if self.memory_samples.len() > self.config.max_history_size {
203 self.memory_samples.pop_front();
204 }
205
206 self.metrics.memory_metrics.record_usage(usage);
208
209 self.trends.update_memory_usage(usage.total_memory);
211
212 self.check_memory_alerts(&usage);
214 }
215
216 pub fn record_loss(&mut self, loss: f64) {
218 self.loss_history.push_back(loss);
219 if self.loss_history.len() > self.config.max_history_size {
220 self.loss_history.pop_front();
221 }
222
223 self.metrics.quality_metrics.record_loss(loss);
225
226 self.trends.update_loss(loss);
228 }
229
230 pub fn profile_operation<F, R>(&mut self, operation_name: &str, operation: F) -> Result<R>
232 where
233 F: FnOnce() -> Result<R>,
234 {
235 let start_time = Instant::now();
236 let result = operation();
237 let duration = start_time.elapsed();
238
239 self.profiling_data
241 .record_operation(operation_name.to_string(), duration);
242
243 self.metrics
245 .timing_metrics
246 .record_operation_time(operation_name, duration);
247
248 result
249 }
250
251 pub fn generate_report(&self) -> PerformanceReport {
253 let session_duration = self.session_start.elapsed();
254
255 PerformanceReport {
256 session_duration,
257 total_optimization_steps: self.step_timings.len(),
258 total_training_epochs: self.training_history.len(),
259 total_meta_steps: self.meta_results_history.len(),
260
261 average_step_time: self.calculate_average_step_time(),
263 average_loss: self.calculate_average_loss(),
264 current_convergence_rate: self.calculate_current_convergence_rate(),
265
266 peak_memory_usage: self.calculate_peak_memory_usage(),
268 average_memory_usage: self.calculate_average_memory_usage(),
269
270 best_loss: self.calculate_best_loss(),
272 loss_improvement: self.calculate_loss_improvement(),
273
274 loss_trend: self.trends.get_loss_trend(),
276 convergence_trend: self.trends.get_convergence_trend(),
277 memory_trend: self.trends.get_memory_trend(),
278
279 performance_alerts: self.get_recent_alerts(),
281
282 cpu_utilization: self.metrics.resource_metrics.get_average_cpu_usage(),
284 memory_utilization: self.metrics.resource_metrics.get_average_memory_usage(),
285
286 quality_score: self.calculate_overall_quality_score(),
288
289 recommendations: self.generate_recommendations(),
291 }
292 }
293
294 pub fn get_loss_history(&self) -> &VecDeque<f64> {
296 &self.loss_history
297 }
298
299 pub fn get_metrics(&self) -> &PerformanceMetrics {
301 &self.metrics
302 }
303
304 pub fn get_trends(&self) -> &PerformanceTrends {
306 &self.trends
307 }
308
309 pub fn set_baselines(&mut self, baselines: PerformanceBaselines) {
311 self.baselines = baselines;
312 }
313
314 pub fn reset(&mut self) {
316 self.metrics = PerformanceMetrics::new();
317 self.loss_history.clear();
318 self.step_timings.clear();
319 self.meta_results_history.clear();
320 self.training_history.clear();
321 self.memory_samples.clear();
322 self.trends.reset();
323 self.profiling_data.reset();
324 self.session_start = Instant::now();
325 }
326
327 fn compute_array_norm(&self, array: &Array1<T>) -> f64 {
329 let sum_squares: T = array
330 .iter()
331 .map(|&x| x * x)
332 .fold(T::zero(), |acc, x| acc + x);
333 sum_squares.sqrt().to_f64().unwrap_or(0.0)
334 }
335
336 fn calculate_average_step_time(&self) -> Duration {
337 if self.step_timings.is_empty() {
338 Duration::new(0, 0)
339 } else {
340 let total: Duration = self.step_timings.iter().sum();
341 total / self.step_timings.len() as u32
342 }
343 }
344
345 fn calculate_average_loss(&self) -> f64 {
346 if self.loss_history.is_empty() {
347 0.0
348 } else {
349 self.loss_history.iter().sum::<f64>() / self.loss_history.len() as f64
350 }
351 }
352
353 fn calculate_current_convergence_rate(&self) -> f64 {
354 if let Some(recent_training) = self.training_history.back() {
355 recent_training.convergence_rate
356 } else {
357 0.0
358 }
359 }
360
361 fn calculate_peak_memory_usage(&self) -> usize {
362 self.memory_samples
363 .iter()
364 .map(|sample| sample.usage.total_memory)
365 .max()
366 .unwrap_or(0)
367 }
368
369 fn calculate_average_memory_usage(&self) -> f64 {
370 if self.memory_samples.is_empty() {
371 0.0
372 } else {
373 let total: usize = self
374 .memory_samples
375 .iter()
376 .map(|s| s.usage.total_memory)
377 .sum();
378 total as f64 / self.memory_samples.len() as f64
379 }
380 }
381
382 fn calculate_best_loss(&self) -> f64 {
383 self.loss_history
384 .iter()
385 .fold(f64::INFINITY, |min, &loss| min.min(loss))
386 }
387
388 fn calculate_loss_improvement(&self) -> f64 {
389 if self.loss_history.len() < 2 {
390 return 0.0;
391 }
392
393 let initial_loss = self.loss_history[0];
394 let final_loss = *self.loss_history.back().unwrap();
395
396 if initial_loss > 0.0 {
397 (initial_loss - final_loss) / initial_loss
398 } else {
399 0.0
400 }
401 }
402
403 fn calculate_overall_quality_score(&self) -> f64 {
404 let loss_score = 1.0 - (self.calculate_average_loss() / 10.0).min(1.0);
406 let convergence_score = self.calculate_current_convergence_rate();
407 let stability_score = 1.0 - self.trends.get_loss_volatility().min(1.0);
408
409 (loss_score * 0.4 + convergence_score * 0.3 + stability_score * 0.3).clamp(0.0, 1.0)
410 }
411
412 fn generate_recommendations(&self) -> Vec<String> {
413 let mut recommendations = Vec::new();
414
415 if self.trends.get_loss_trend() > 0.0 {
417 recommendations
418 .push("Loss is increasing. Consider reducing learning rate.".to_string());
419 }
420
421 if self.calculate_peak_memory_usage() > 1024 * 1024 * 1024 {
423 recommendations.push(
425 "High memory usage detected. Consider enabling memory compression.".to_string(),
426 );
427 }
428
429 if self.calculate_average_step_time() > Duration::from_millis(100) {
431 recommendations.push(
432 "Slow optimization steps. Consider reducing model size or batch size.".to_string(),
433 );
434 }
435
436 if self.calculate_current_convergence_rate() < 0.1 {
438 recommendations.push(
439 "Low convergence rate. Consider adjusting meta-learning parameters.".to_string(),
440 );
441 }
442
443 recommendations
444 }
445
446 fn check_performance_alerts(&mut self, duration: Duration, update_norm: f64) {
447 if duration > self.alert_thresholds.max_step_time {
449 self.record_alert(AlertType::SlowStep, format!("Step took {:?}", duration));
450 }
451
452 if update_norm > self.alert_thresholds.max_gradient_norm {
454 self.record_alert(
455 AlertType::GradientExplosion,
456 format!("Update norm: {:.6}", update_norm),
457 );
458 }
459
460 if update_norm < self.alert_thresholds.min_gradient_norm {
462 self.record_alert(
463 AlertType::GradientVanishing,
464 format!("Update norm: {:.6}", update_norm),
465 );
466 }
467 }
468
469 fn check_convergence_alerts(&mut self, metrics: &TrainingMetrics) {
470 if metrics.convergence_rate < self.alert_thresholds.min_convergence_rate {
472 self.record_alert(
473 AlertType::TrainingStagnation,
474 format!("Convergence rate: {:.6}", metrics.convergence_rate),
475 );
476 }
477
478 if let Some(previous) = self.training_history.iter().rev().nth(1) {
480 if metrics.loss > previous.loss * 1.1 {
481 self.record_alert(
483 AlertType::LossIncrease,
484 format!(
485 "Loss increased from {:.6} to {:.6}",
486 previous.loss, metrics.loss
487 ),
488 );
489 }
490 }
491 }
492
493 fn check_memory_alerts(&mut self, usage: &MemoryUsage) {
494 if usage.total_memory > self.alert_thresholds.max_memory_usage {
496 self.record_alert(
497 AlertType::HighMemoryUsage,
498 format!("Memory usage: {} bytes", usage.total_memory),
499 );
500 }
501
502 if let Some(previous) = self.memory_samples.back() {
504 let memory_increase = usage
505 .total_memory
506 .saturating_sub(previous.usage.total_memory);
507 if memory_increase > 1024 * 1024 * 100 {
508 self.record_alert(
510 AlertType::PossibleMemoryLeak,
511 format!("Memory increased by {} bytes", memory_increase),
512 );
513 }
514 }
515 }
516
517 fn record_alert(&mut self, alert_type: AlertType, message: String) {
518 let alert = PerformanceAlert {
519 alert_type,
520 message,
521 timestamp: std::time::SystemTime::now(),
522 };
523
524 self.metrics.quality_metrics.record_alert(alert);
525 }
526
527 fn get_recent_alerts(&self) -> Vec<PerformanceAlert> {
528 self.metrics.quality_metrics.get_recent_alerts(10)
529 }
530}
531
532#[derive(Debug, Clone, Serialize, Deserialize)]
534pub struct TrainingMetricsCollection {
535 pub total_epochs: usize,
536 pub total_training_time: Duration,
537 pub average_loss: f64,
538 pub best_loss: f64,
539 pub convergence_rate: f64,
540 pub meta_steps: usize,
541 pub task_adaptations: usize,
542}
543
544#[derive(Debug, Clone, Serialize, Deserialize)]
545pub struct InferenceMetricsCollection {
546 pub total_inferences: usize,
547 pub average_latency: Duration,
548 pub peak_throughput: f64,
549 pub average_quality: f64,
550 pub cache_hit_rate: f64,
551}
552
553#[derive(Debug, Clone, Serialize, Deserialize)]
554pub struct MemoryMetricsCollection {
555 pub peak_usage: usize,
556 pub average_usage: f64,
557 pub total_allocations: usize,
558 pub compression_ratio: f64,
559 pub fragmentation_rate: f64,
560}
561
562#[derive(Debug, Clone, Serialize, Deserialize)]
563pub struct TimingMetricsCollection {
564 pub average_step_time: Duration,
565 pub total_computation_time: Duration,
566 pub operation_timings: HashMap<String, Duration>,
567 pub profiling_overhead: Duration,
568}
569
570#[derive(Debug, Clone, Serialize, Deserialize)]
571pub struct QualityMetricsCollection {
572 pub loss_statistics: LossStatistics,
573 pub convergence_statistics: ConvergenceStatistics,
574 pub stability_metrics: StabilityMetrics,
575 pub performance_alerts: VecDeque<PerformanceAlert>,
576}
577
578#[derive(Debug, Clone, Serialize, Deserialize)]
579pub struct ResourceMetricsCollection {
580 pub cpu_usage_history: VecDeque<f64>,
581 pub memory_usage_history: VecDeque<f64>,
582 pub disk_io_metrics: DiskIOMetrics,
583 pub network_metrics: NetworkMetrics,
584}
585
586#[derive(Debug, Clone, Serialize, Deserialize)]
587pub struct OptimizationMetricsCollection {
588 pub update_norm_history: VecDeque<f64>,
589 pub parameter_change_rate: f64,
590 pub optimization_efficiency: f64,
591 pub adaptive_learning_metrics: AdaptiveLearningMetrics,
592}
593
594#[derive(Debug, Clone)]
596pub struct MemorySample {
597 pub timestamp: std::time::SystemTime,
598 pub usage: MemoryUsage,
599}
600
601#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
602pub struct MemoryUsage {
603 pub total_memory: usize,
604 pub model_memory: usize,
605 pub cache_memory: usize,
606 pub temporary_memory: usize,
607}
608
609#[derive(Debug, Clone)]
610pub struct PerformanceBaselines {
611 pub baseline_loss: f64,
612 pub baseline_step_time: Duration,
613 pub baseline_memory_usage: usize,
614 pub baseline_convergence_rate: f64,
615}
616
617impl Default for PerformanceBaselines {
618 fn default() -> Self {
619 Self::new()
620 }
621}
622
623impl PerformanceBaselines {
624 pub fn new() -> Self {
625 Self {
626 baseline_loss: f64::INFINITY,
627 baseline_step_time: Duration::new(0, 0),
628 baseline_memory_usage: 0,
629 baseline_convergence_rate: 0.0,
630 }
631 }
632}
633
634#[derive(Debug, Clone)]
635pub struct AlertThresholds {
636 pub max_step_time: Duration,
637 pub min_convergence_rate: f64,
638 pub max_memory_usage: usize,
639 pub max_gradient_norm: f64,
640 pub min_gradient_norm: f64,
641}
642
643impl Default for AlertThresholds {
644 fn default() -> Self {
645 Self {
646 max_step_time: Duration::from_secs(1),
647 min_convergence_rate: 0.01,
648 max_memory_usage: 2 * 1024 * 1024 * 1024, max_gradient_norm: 10.0,
650 min_gradient_norm: 1e-8,
651 }
652 }
653}
654
655#[derive(Debug, Clone)]
656pub struct PerformanceTrends {
657 loss_trend: TrendAnalyzer,
658 convergence_trend: TrendAnalyzer,
659 memory_trend: TrendAnalyzer,
660 timing_trend: TrendAnalyzer,
661}
662
663impl Default for PerformanceTrends {
664 fn default() -> Self {
665 Self::new()
666 }
667}
668
669impl PerformanceTrends {
670 pub fn new() -> Self {
671 Self {
672 loss_trend: TrendAnalyzer::new(100),
673 convergence_trend: TrendAnalyzer::new(50),
674 memory_trend: TrendAnalyzer::new(100),
675 timing_trend: TrendAnalyzer::new(100),
676 }
677 }
678
679 pub fn update_loss(&mut self, loss: f64) {
680 self.loss_trend.add_sample(loss);
681 }
682
683 pub fn update_training_loss(&mut self, loss: f64) {
684 self.loss_trend.add_sample(loss);
685 }
686
687 pub fn update_meta_loss(&mut self, loss: f64) {
688 self.loss_trend.add_sample(loss);
689 }
690
691 pub fn update_convergence_rate(&mut self, rate: f64) {
692 self.convergence_trend.add_sample(rate);
693 }
694
695 pub fn update_adaptation_efficiency(&mut self, efficiency: f64) {
696 self.convergence_trend.add_sample(efficiency);
697 }
698
699 pub fn update_memory_usage(&mut self, usage: usize) {
700 self.memory_trend.add_sample(usage as f64);
701 }
702
703 pub fn update_step_timing(&mut self, duration: Duration) {
704 self.timing_trend.add_sample(duration.as_secs_f64());
705 }
706
707 pub fn update_inference_latency(&mut self, duration: Duration) {
708 self.timing_trend.add_sample(duration.as_secs_f64());
709 }
710
711 pub fn update_inference_quality(&mut self, _quality: f64) {
712 }
714
715 pub fn get_loss_trend(&self) -> f64 {
716 self.loss_trend.get_trend()
717 }
718
719 pub fn get_convergence_trend(&self) -> f64 {
720 self.convergence_trend.get_trend()
721 }
722
723 pub fn get_memory_trend(&self) -> f64 {
724 self.memory_trend.get_trend()
725 }
726
727 pub fn get_loss_volatility(&self) -> f64 {
728 self.loss_trend.get_volatility()
729 }
730
731 pub fn reset(&mut self) {
732 self.loss_trend.reset();
733 self.convergence_trend.reset();
734 self.memory_trend.reset();
735 self.timing_trend.reset();
736 }
737}
738
739#[derive(Debug, Clone)]
740pub struct TrendAnalyzer {
741 samples: VecDeque<f64>,
742 max_samples: usize,
743}
744
745impl TrendAnalyzer {
746 pub fn new(max_samples: usize) -> Self {
747 Self {
748 samples: VecDeque::new(),
749 max_samples,
750 }
751 }
752
753 pub fn add_sample(&mut self, value: f64) {
754 self.samples.push_back(value);
755 if self.samples.len() > self.max_samples {
756 self.samples.pop_front();
757 }
758 }
759
760 pub fn get_trend(&self) -> f64 {
761 if self.samples.len() < 2 {
762 return 0.0;
763 }
764
765 let n = self.samples.len() as f64;
767 let x_sum = (0..self.samples.len()).map(|i| i as f64).sum::<f64>();
768 let y_sum = self.samples.iter().sum::<f64>();
769 let xy_sum = self
770 .samples
771 .iter()
772 .enumerate()
773 .map(|(i, &y)| i as f64 * y)
774 .sum::<f64>();
775 let x_sq_sum = (0..self.samples.len())
776 .map(|i| (i as f64).powi(2))
777 .sum::<f64>();
778
779 let denominator = n * x_sq_sum - x_sum * x_sum;
780 if denominator.abs() < 1e-10 {
781 0.0
782 } else {
783 (n * xy_sum - x_sum * y_sum) / denominator
784 }
785 }
786
787 pub fn get_volatility(&self) -> f64 {
788 if self.samples.len() < 2 {
789 return 0.0;
790 }
791
792 let mean = self.samples.iter().sum::<f64>() / self.samples.len() as f64;
793 let variance = self
794 .samples
795 .iter()
796 .map(|&x| (x - mean).powi(2))
797 .sum::<f64>()
798 / self.samples.len() as f64;
799
800 variance.sqrt()
801 }
802
803 pub fn reset(&mut self) {
804 self.samples.clear();
805 }
806}
807
808#[derive(Debug, Clone)]
809pub struct ProfilingData {
810 operation_timings: HashMap<String, VecDeque<Duration>>,
811 total_operations: usize,
812}
813
814impl Default for ProfilingData {
815 fn default() -> Self {
816 Self::new()
817 }
818}
819
820impl ProfilingData {
821 pub fn new() -> Self {
822 Self {
823 operation_timings: HashMap::new(),
824 total_operations: 0,
825 }
826 }
827
828 pub fn record_operation(&mut self, operation: String, duration: Duration) {
829 let timings = self.operation_timings.entry(operation).or_default();
830 timings.push_back(duration);
831 if timings.len() > 1000 {
832 timings.pop_front();
833 }
834 self.total_operations += 1;
835 }
836
837 pub fn get_average_time(&self, operation: &str) -> Option<Duration> {
838 self.operation_timings.get(operation).map(|timings| {
839 let total: Duration = timings.iter().sum();
840 total / timings.len() as u32
841 })
842 }
843
844 pub fn reset(&mut self) {
845 self.operation_timings.clear();
846 self.total_operations = 0;
847 }
848}
849
850#[derive(Debug, Clone, Serialize, Deserialize)]
852pub struct PerformanceAlert {
853 pub alert_type: AlertType,
854 pub message: String,
855 pub timestamp: std::time::SystemTime,
856}
857
858#[derive(Debug, Clone, Serialize, Deserialize)]
859pub enum AlertType {
860 SlowStep,
861 GradientExplosion,
862 GradientVanishing,
863 TrainingStagnation,
864 LossIncrease,
865 HighMemoryUsage,
866 PossibleMemoryLeak,
867 ConvergenceFailure,
868}
869
870#[derive(Debug, Clone)]
872pub struct PerformanceReport {
873 pub session_duration: Duration,
874 pub total_optimization_steps: usize,
875 pub total_training_epochs: usize,
876 pub total_meta_steps: usize,
877 pub average_step_time: Duration,
878 pub average_loss: f64,
879 pub current_convergence_rate: f64,
880 pub peak_memory_usage: usize,
881 pub average_memory_usage: f64,
882 pub best_loss: f64,
883 pub loss_improvement: f64,
884 pub loss_trend: f64,
885 pub convergence_trend: f64,
886 pub memory_trend: f64,
887 pub performance_alerts: Vec<PerformanceAlert>,
888 pub cpu_utilization: f64,
889 pub memory_utilization: f64,
890 pub quality_score: f64,
891 pub recommendations: Vec<String>,
892}
893
894#[derive(Debug, Clone, Serialize, Deserialize)]
896pub struct LossStatistics {
897 pub mean: f64,
898 pub std_dev: f64,
899 pub min: f64,
900 pub max: f64,
901}
902
903#[derive(Debug, Clone, Serialize, Deserialize)]
904pub struct ConvergenceStatistics {
905 pub average_rate: f64,
906 pub best_rate: f64,
907 pub convergence_episodes: usize,
908}
909
910#[derive(Debug, Clone, Serialize, Deserialize)]
911pub struct StabilityMetrics {
912 pub loss_volatility: f64,
913 pub gradient_stability: f64,
914 pub convergence_stability: f64,
915}
916
917#[derive(Debug, Clone, Serialize, Deserialize)]
918pub struct DiskIOMetrics {
919 pub total_reads: usize,
920 pub total_writes: usize,
921 pub total_bytes_read: usize,
922 pub total_bytes_written: usize,
923}
924
925#[derive(Debug, Clone, Serialize, Deserialize)]
926pub struct NetworkMetrics {
927 pub total_requests: usize,
928 pub total_bytes_sent: usize,
929 pub total_bytes_received: usize,
930 pub average_latency: Duration,
931}
932
933#[derive(Debug, Clone, Serialize, Deserialize)]
934pub struct AdaptiveLearningMetrics {
935 pub learning_rate_adjustments: usize,
936 pub batch_size_adjustments: usize,
937 pub architecture_modifications: usize,
938}
939
940impl Default for PerformanceMetrics {
942 fn default() -> Self {
943 Self::new()
944 }
945}
946
947impl PerformanceMetrics {
948 pub fn new() -> Self {
949 Self {
950 training_metrics: TrainingMetricsCollection::new(),
951 inference_metrics: InferenceMetricsCollection::new(),
952 memory_metrics: MemoryMetricsCollection::new(),
953 timing_metrics: TimingMetricsCollection::new(),
954 quality_metrics: QualityMetricsCollection::new(),
955 resource_metrics: ResourceMetricsCollection::new(),
956 optimization_metrics: OptimizationMetricsCollection::new(),
957 }
958 }
959}
960
961impl Default for TrainingMetricsCollection {
962 fn default() -> Self {
963 Self::new()
964 }
965}
966
967impl TrainingMetricsCollection {
968 pub fn new() -> Self {
969 Self {
970 total_epochs: 0,
971 total_training_time: Duration::new(0, 0),
972 average_loss: 0.0,
973 best_loss: f64::INFINITY,
974 convergence_rate: 0.0,
975 meta_steps: 0,
976 task_adaptations: 0,
977 }
978 }
979
980 pub fn record_epoch(&mut self, loss: f64, duration: Duration, convergence: f64) {
981 self.total_epochs += 1;
982 self.total_training_time += duration;
983 self.average_loss =
984 (self.average_loss * (self.total_epochs - 1) as f64 + loss) / self.total_epochs as f64;
985 self.best_loss = self.best_loss.min(loss);
986 self.convergence_rate = convergence;
987 }
988
989 pub fn record_meta_step(&mut self, loss: f64, _duration: Duration, adaptations: usize) {
990 self.meta_steps += 1;
991 self.task_adaptations += adaptations;
992 self.best_loss = self.best_loss.min(loss);
993 }
994}
995
996impl Default for InferenceMetricsCollection {
997 fn default() -> Self {
998 Self::new()
999 }
1000}
1001
1002impl InferenceMetricsCollection {
1003 pub fn new() -> Self {
1004 Self {
1005 total_inferences: 0,
1006 average_latency: Duration::new(0, 0),
1007 peak_throughput: 0.0,
1008 average_quality: 0.0,
1009 cache_hit_rate: 0.0,
1010 }
1011 }
1012
1013 pub fn record_inference(&mut self, _input_size: usize, duration: Duration, quality: f64) {
1014 self.total_inferences += 1;
1015 self.average_latency = (self.average_latency * (self.total_inferences - 1) as u32
1016 + duration)
1017 / self.total_inferences as u32;
1018 self.average_quality = (self.average_quality * (self.total_inferences - 1) as f64
1019 + quality)
1020 / self.total_inferences as f64;
1021 }
1022
1023 pub fn update_throughput(&mut self, throughput: f64) {
1024 self.peak_throughput = self.peak_throughput.max(throughput);
1025 }
1026}
1027
1028impl Default for MemoryMetricsCollection {
1029 fn default() -> Self {
1030 Self::new()
1031 }
1032}
1033
1034impl MemoryMetricsCollection {
1035 pub fn new() -> Self {
1036 Self {
1037 peak_usage: 0,
1038 average_usage: 0.0,
1039 total_allocations: 0,
1040 compression_ratio: 1.0,
1041 fragmentation_rate: 0.0,
1042 }
1043 }
1044
1045 pub fn record_usage(&mut self, usage: MemoryUsage) {
1046 self.peak_usage = self.peak_usage.max(usage.total_memory);
1047 self.total_allocations += 1;
1048 self.average_usage = (self.average_usage * (self.total_allocations - 1) as f64
1049 + usage.total_memory as f64)
1050 / self.total_allocations as f64;
1051 }
1052}
1053
1054impl Default for TimingMetricsCollection {
1055 fn default() -> Self {
1056 Self::new()
1057 }
1058}
1059
1060impl TimingMetricsCollection {
1061 pub fn new() -> Self {
1062 Self {
1063 average_step_time: Duration::new(0, 0),
1064 total_computation_time: Duration::new(0, 0),
1065 operation_timings: HashMap::new(),
1066 profiling_overhead: Duration::new(0, 0),
1067 }
1068 }
1069
1070 pub fn record_step_time(&mut self, duration: Duration) {
1071 self.total_computation_time += duration;
1072 }
1074
1075 pub fn record_operation_time(&mut self, operation: &str, duration: Duration) {
1076 self.operation_timings
1077 .insert(operation.to_string(), duration);
1078 }
1079}
1080
1081impl Default for QualityMetricsCollection {
1082 fn default() -> Self {
1083 Self::new()
1084 }
1085}
1086
1087impl QualityMetricsCollection {
1088 pub fn new() -> Self {
1089 Self {
1090 loss_statistics: LossStatistics {
1091 mean: 0.0,
1092 std_dev: 0.0,
1093 min: f64::INFINITY,
1094 max: f64::NEG_INFINITY,
1095 },
1096 convergence_statistics: ConvergenceStatistics {
1097 average_rate: 0.0,
1098 best_rate: 0.0,
1099 convergence_episodes: 0,
1100 },
1101 stability_metrics: StabilityMetrics {
1102 loss_volatility: 0.0,
1103 gradient_stability: 0.0,
1104 convergence_stability: 0.0,
1105 },
1106 performance_alerts: VecDeque::new(),
1107 }
1108 }
1109
1110 pub fn record_loss(&mut self, loss: f64) {
1111 self.loss_statistics.min = self.loss_statistics.min.min(loss);
1112 self.loss_statistics.max = self.loss_statistics.max.max(loss);
1113 }
1114
1115 pub fn record_alert(&mut self, alert: PerformanceAlert) {
1116 self.performance_alerts.push_back(alert);
1117 if self.performance_alerts.len() > 100 {
1118 self.performance_alerts.pop_front();
1119 }
1120 }
1121
1122 pub fn get_recent_alerts(&self, count: usize) -> Vec<PerformanceAlert> {
1123 self.performance_alerts
1124 .iter()
1125 .rev()
1126 .take(count)
1127 .cloned()
1128 .collect()
1129 }
1130}
1131
1132impl Default for ResourceMetricsCollection {
1133 fn default() -> Self {
1134 Self::new()
1135 }
1136}
1137
1138impl ResourceMetricsCollection {
1139 pub fn new() -> Self {
1140 Self {
1141 cpu_usage_history: VecDeque::new(),
1142 memory_usage_history: VecDeque::new(),
1143 disk_io_metrics: DiskIOMetrics {
1144 total_reads: 0,
1145 total_writes: 0,
1146 total_bytes_read: 0,
1147 total_bytes_written: 0,
1148 },
1149 network_metrics: NetworkMetrics {
1150 total_requests: 0,
1151 total_bytes_sent: 0,
1152 total_bytes_received: 0,
1153 average_latency: Duration::new(0, 0),
1154 },
1155 }
1156 }
1157
1158 pub fn get_average_cpu_usage(&self) -> f64 {
1159 if self.cpu_usage_history.is_empty() {
1160 0.0
1161 } else {
1162 self.cpu_usage_history.iter().sum::<f64>() / self.cpu_usage_history.len() as f64
1163 }
1164 }
1165
1166 pub fn get_average_memory_usage(&self) -> f64 {
1167 if self.memory_usage_history.is_empty() {
1168 0.0
1169 } else {
1170 self.memory_usage_history.iter().sum::<f64>() / self.memory_usage_history.len() as f64
1171 }
1172 }
1173}
1174
1175impl Default for OptimizationMetricsCollection {
1176 fn default() -> Self {
1177 Self::new()
1178 }
1179}
1180
1181impl OptimizationMetricsCollection {
1182 pub fn new() -> Self {
1183 Self {
1184 update_norm_history: VecDeque::new(),
1185 parameter_change_rate: 0.0,
1186 optimization_efficiency: 0.0,
1187 adaptive_learning_metrics: AdaptiveLearningMetrics {
1188 learning_rate_adjustments: 0,
1189 batch_size_adjustments: 0,
1190 architecture_modifications: 0,
1191 },
1192 }
1193 }
1194
1195 pub fn record_update_norm(&mut self, norm: f64) {
1196 self.update_norm_history.push_back(norm);
1197 if self.update_norm_history.len() > 1000 {
1198 self.update_norm_history.pop_front();
1199 }
1200 }
1201}
1202
1203#[cfg(test)]
1204mod tests {
1205 use super::*;
1206
1207 #[test]
1208 fn test_performance_tracker_creation() {
1209 let tracker = TransformerPerformanceTracker::<f32>::new();
1210 assert_eq!(tracker.loss_history.len(), 0);
1211 assert_eq!(tracker.step_timings.len(), 0);
1212 }
1213
1214 #[test]
1215 fn test_record_loss() {
1216 let mut tracker = TransformerPerformanceTracker::<f32>::new();
1217 tracker.record_loss(1.5);
1218 tracker.record_loss(1.2);
1219 tracker.record_loss(0.9);
1220
1221 assert_eq!(tracker.loss_history.len(), 3);
1222 assert_eq!(tracker.calculate_best_loss(), 0.9);
1223 }
1224
1225 #[test]
1226 fn test_trend_analyzer() {
1227 let mut analyzer = TrendAnalyzer::new(10);
1228
1229 for i in 0..5 {
1230 analyzer.add_sample(i as f64);
1231 }
1232
1233 let trend = analyzer.get_trend();
1234 assert!(trend > 0.0); }
1236
1237 #[test]
1238 fn test_performance_report_generation() {
1239 let mut tracker = TransformerPerformanceTracker::<f32>::new();
1240 tracker.record_loss(2.0);
1241 tracker.record_loss(1.5);
1242 tracker.record_loss(1.0);
1243
1244 let report = tracker.generate_report();
1245 assert!(report.loss_improvement > 0.0);
1246 assert_eq!(report.best_loss, 1.0);
1247 }
1248
1249 #[test]
1250 fn test_profiling() {
1251 let mut tracker = TransformerPerformanceTracker::<f32>::new();
1252
1253 let result = tracker.profile_operation("test_op", || {
1254 std::thread::sleep(Duration::from_millis(10));
1255 Ok(42)
1256 });
1257
1258 assert!(result.is_ok());
1259 assert_eq!(result.unwrap(), 42);
1260 }
1261}