1use crate::error::{OptimError, Result};
7use crate::optimizers::*;
8use crate::unified_api::OptimizerConfig;
9use chrono::{DateTime, Utc};
10use scirs2_core::ndarray::{Array1, Array2};
11use scirs2_core::numeric::Float;
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct Experiment {
18 pub id: String,
20 pub name: String,
22 pub hypothesis: String,
24 pub description: String,
26 pub status: ExperimentStatus,
28 pub config: ExperimentConfig,
30 pub optimizer_configs: HashMap<String, OptimizerConfig<f64>>,
32 pub dataset_info: DatasetInfo,
34 pub metrics: Vec<String>,
36 pub results: Vec<ExperimentResult>,
38 pub reproducibility: ReproducibilityInfo,
40 pub timeline: ExperimentTimeline,
42 pub notes: Vec<ExperimentNote>,
44 pub metadata: ExperimentMetadata,
46}
47
48#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
50pub enum ExperimentStatus {
51 Planning,
53 Ready,
55 Running,
57 Completed,
59 Failed,
61 Paused,
63 Cancelled,
65 Analyzing,
67 Published,
69}
70
71#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct ExperimentConfig {
74 pub random_seed: u64,
76 pub num_runs: usize,
78 pub max_epochs: usize,
80 pub early_stopping: Option<EarlyStoppingConfig>,
82 pub hardware_config: HardwareConfig,
84 pub environment: HashMap<String, String>,
86 pub validation_split: f64,
88 pub test_split: f64,
90 pub cv_folds: Option<usize>,
92}
93
94#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct EarlyStoppingConfig {
97 pub monitor_metric: String,
99 pub patience: usize,
101 pub min_improvement: f64,
103 pub mode: OptimizationMode,
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
109pub enum OptimizationMode {
110 Minimize,
112 Maximize,
114}
115
116#[derive(Debug, Clone, Serialize, Deserialize, Default)]
118pub struct HardwareConfig {
119 pub cpu_info: CpuInfo,
121 pub gpu_info: Option<GpuInfo>,
123 pub memory_config: MemoryConfig,
125 pub parallel_config: ParallelConfig,
127}
128
129#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct CpuInfo {
132 pub model: String,
134 pub cores: usize,
136 pub threads: usize,
138 pub frequency_mhz: u32,
140 pub cache_sizes: Vec<String>,
142 pub simd_capabilities: Vec<String>,
144}
145
146#[derive(Debug, Clone, Serialize, Deserialize)]
148pub struct GpuInfo {
149 pub model: String,
151 pub memory_mb: usize,
153 pub compute_capability: String,
155 pub cuda_version: Option<String>,
157 pub driver_version: String,
159}
160
161#[derive(Debug, Clone, Serialize, Deserialize)]
163pub struct MemoryConfig {
164 pub total_memory_mb: usize,
166 pub available_memory_mb: usize,
168 pub allocation_strategy: MemoryAllocationStrategy,
170 pub pool_size_mb: Option<usize>,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
176pub enum MemoryAllocationStrategy {
177 Standard,
179 Pooled,
181 MemoryMapped,
183 Compressed,
185}
186
187#[derive(Debug, Clone, Serialize, Deserialize)]
189pub struct ParallelConfig {
190 pub num_threads: usize,
192 pub thread_affinity: Option<Vec<usize>>,
194 pub work_stealing: bool,
196 pub chunk_size: Option<usize>,
198}
199
200#[derive(Debug, Clone, Serialize, Deserialize)]
202pub struct DatasetInfo {
203 pub name: String,
205 pub description: String,
207 pub source: String,
209 pub version: String,
211 pub num_samples: usize,
213 pub num_features: usize,
215 pub num_classes: Option<usize>,
217 pub data_type: DataType,
219 pub statistics: DatasetStatistics,
221 pub preprocessing: Vec<PreprocessingStep>,
223}
224
225#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
227pub enum DataType {
228 Tabular,
230 Image,
232 Text,
234 Audio,
236 Video,
238 TimeSeries,
240 Graph,
242 MultiModal,
244}
245
246#[derive(Debug, Clone, Serialize, Deserialize, Default)]
248pub struct DatasetStatistics {
249 pub feature_means: Vec<f64>,
251 pub feature_stds: Vec<f64>,
253 pub feature_ranges: Vec<(f64, f64)>,
255 pub class_distribution: Option<HashMap<String, usize>>,
257 pub missing_values: Vec<usize>,
259 pub correlation_matrix: Option<Array2<f64>>,
261}
262
263#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct PreprocessingStep {
266 pub name: String,
268 pub description: String,
270 pub parameters: HashMap<String, serde_json::Value>,
272 pub order: usize,
274}
275
276#[derive(Debug, Clone, Serialize, Deserialize)]
278pub struct ExperimentResult {
279 pub run_id: String,
281 pub optimizer_name: String,
283 pub start_time: DateTime<Utc>,
285 pub end_time: Option<DateTime<Utc>>,
287 pub status: RunStatus,
289 pub final_metrics: HashMap<String, f64>,
291 pub training_history: TrainingHistory,
293 pub resource_usage: ResourceUsage,
295 pub error_info: Option<String>,
297 pub metadata: HashMap<String, serde_json::Value>,
299}
300
301#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
303pub enum RunStatus {
304 Success,
306 Failed,
308 Terminated,
310 Timeout,
312 Cancelled,
314}
315
316#[derive(Debug, Clone, Serialize, Deserialize)]
318pub struct TrainingHistory {
319 pub epochs: Vec<usize>,
321 pub train_metrics: HashMap<String, Vec<f64>>,
323 pub val_metrics: HashMap<String, Vec<f64>>,
325 pub learning_rates: Vec<f64>,
327 pub gradient_norms: Vec<f64>,
329 pub parameter_norms: Vec<f64>,
331 pub step_times: Vec<f64>,
333}
334
335#[derive(Debug, Clone, Default, Serialize, Deserialize)]
337pub struct ResourceUsage {
338 pub peak_cpu_usage: f64,
340 pub avg_cpu_usage: f64,
342 pub peak_memory_mb: usize,
344 pub avg_memory_mb: usize,
346 pub peak_gpu_memory_mb: Option<usize>,
348 pub total_time_seconds: f64,
350 pub energy_consumption_joules: Option<f64>,
352}
353
354#[derive(Debug, Clone, Serialize, Deserialize, Default)]
356pub struct ReproducibilityInfo {
357 pub environment_hash: String,
359 pub git_commit: Option<String>,
361 pub code_checksum: String,
363 pub dependency_versions: HashMap<String, String>,
365 pub system_info: SystemInfo,
367 pub checklist: ReproducibilityChecklist,
369}
370
371#[derive(Debug, Clone, Serialize, Deserialize)]
373pub struct SystemInfo {
374 pub os: String,
376 pub os_version: String,
378 pub architecture: String,
380 pub hostname: String,
382 pub username: String,
384 pub timezone: String,
386}
387
388#[derive(Debug, Clone, Serialize, Deserialize, Default)]
390pub struct ReproducibilityChecklist {
391 pub random_seed_set: bool,
393 pub dependencies_pinned: bool,
395 pub data_version_controlled: bool,
397 pub code_version_controlled: bool,
399 pub environment_documented: bool,
401 pub hardware_documented: bool,
403 pub results_archived: bool,
405}
406
407#[derive(Debug, Clone, Serialize, Deserialize)]
409pub struct ExperimentTimeline {
410 pub created_at: DateTime<Utc>,
412 pub started_at: Option<DateTime<Utc>>,
414 pub completed_at: Option<DateTime<Utc>>,
416 pub estimated_duration: Option<chrono::Duration>,
418 pub actual_duration: Option<chrono::Duration>,
420}
421
422#[derive(Debug, Clone, Serialize, Deserialize)]
424pub struct ExperimentNote {
425 pub timestamp: DateTime<Utc>,
427 pub author: String,
429 pub content: String,
431 pub note_type: NoteType,
433 pub run_id: Option<String>,
435}
436
437#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
439pub enum NoteType {
440 Observation,
442 Issue,
444 Solution,
446 Hypothesis,
448 Conclusion,
450 Question,
452 Reminder,
454}
455
456#[derive(Debug, Clone, Serialize, Deserialize, Default)]
458pub struct ExperimentMetadata {
459 pub tags: Vec<String>,
461 pub research_question: String,
463 pub expected_outcomes: Vec<String>,
465 pub success_criteria: Vec<String>,
467 pub related_experiments: Vec<String>,
469 pub references: Vec<String>,
471}
472
473pub struct ExperimentRunner {
475 experiment: Experiment,
477 resource_monitor: ResourceMonitor,
479 progress_callback: Option<Box<dyn Fn(f64) + Send + Sync>>,
481}
482
483impl std::fmt::Debug for ExperimentRunner {
484 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
485 f.debug_struct("ExperimentRunner")
486 .field("experiment", &self.experiment)
487 .field("resource_monitor", &self.resource_monitor)
488 .field("progress_callback", &self.progress_callback.is_some())
489 .finish()
490 }
491}
492
493#[derive(Debug)]
495pub struct ResourceMonitor {
496 cpu_usage: Vec<f64>,
498 memory_usage: Vec<usize>,
500 gpu_memory_usage: Vec<Option<usize>>,
502 interval_seconds: u64,
504}
505
506impl Experiment {
507 pub fn new(name: &str) -> Self {
509 let now = Utc::now();
510 Self {
511 id: uuid::Uuid::new_v4().to_string(),
512 name: name.to_string(),
513 hypothesis: String::new(),
514 description: String::new(),
515 status: ExperimentStatus::Planning,
516 config: ExperimentConfig::default(),
517 optimizer_configs: HashMap::new(),
518 dataset_info: DatasetInfo::default(),
519 metrics: Vec::new(),
520 results: Vec::new(),
521 reproducibility: ReproducibilityInfo::default(),
522 timeline: ExperimentTimeline {
523 created_at: now,
524 started_at: None,
525 completed_at: None,
526 estimated_duration: None,
527 actual_duration: None,
528 },
529 notes: Vec::new(),
530 metadata: ExperimentMetadata::default(),
531 }
532 }
533
534 pub fn hypothesis(mut self, hypothesis: &str) -> Self {
536 self.hypothesis = hypothesis.to_string();
537 self
538 }
539
540 pub fn description(mut self, description: &str) -> Self {
542 self.description = description.to_string();
543 self
544 }
545
546 pub fn add_optimizer_config(mut self, name: &str, config: OptimizerConfig<f64>) -> Self {
548 self.optimizer_configs.insert(name.to_string(), config);
549 self
550 }
551
552 pub fn dataset(mut self, datasetinfo: DatasetInfo) -> Self {
554 self.dataset_info = datasetinfo;
555 self
556 }
557
558 pub fn metrics(mut self, metrics: Vec<String>) -> Self {
560 self.metrics = metrics;
561 self
562 }
563
564 pub fn add_note(&mut self, author: &str, content: &str, notetype: NoteType) {
566 let note = ExperimentNote {
567 timestamp: Utc::now(),
568 author: author.to_string(),
569 content: content.to_string(),
570 note_type: notetype,
571 run_id: None,
572 };
573 self.notes.push(note);
574 }
575
576 pub fn start(&mut self) -> Result<()> {
578 if self.status != ExperimentStatus::Ready && self.status != ExperimentStatus::Planning {
579 return Err(OptimError::InvalidConfig(format!(
580 "Cannot start experiment in status {:?}",
581 self.status
582 )));
583 }
584
585 self.status = ExperimentStatus::Running;
586 self.timeline.started_at = Some(Utc::now());
587
588 Ok(())
589 }
590
591 pub fn complete(&mut self) -> Result<()> {
593 if self.status != ExperimentStatus::Running {
594 return Err(OptimError::InvalidConfig(format!(
595 "Cannot complete experiment in status {:?}",
596 self.status
597 )));
598 }
599
600 self.status = ExperimentStatus::Completed;
601 self.timeline.completed_at = Some(Utc::now());
602
603 if let (Some(start), Some(end)) = (self.timeline.started_at, self.timeline.completed_at) {
604 self.timeline.actual_duration = Some(end - start);
605 }
606
607 Ok(())
608 }
609
610 pub fn generate_report(&self) -> String {
612 let mut report = String::new();
613
614 report.push_str(&format!("# Experiment Report: {}\n\n", self.name));
615 report.push_str(&format!("**ID**: {}\n", self.id));
616 report.push_str(&format!("**Status**: {:?}\n", self.status));
617 report.push_str(&format!("**Hypothesis**: {}\n\n", self.hypothesis));
618
619 if !self.description.is_empty() {
620 report.push_str(&format!("## Description\n\n{}\n\n", self.description));
621 }
622
623 report.push_str("## Configuration\n\n");
624 report.push_str(&format!("- **Random Seed**: {}\n", self.config.random_seed));
625 report.push_str(&format!("- **Number of Runs**: {}\n", self.config.num_runs));
626 report.push_str(&format!("- **Max Epochs**: {}\n", self.config.max_epochs));
627
628 report.push_str("\n## Optimizers\n\n");
629 for name in self.optimizer_configs.keys() {
630 report.push_str(&format!("- {}\n", name));
631 }
632
633 report.push_str("\n## Dataset\n\n");
634 report.push_str(&format!("- **Name**: {}\n", self.dataset_info.name));
635 report.push_str(&format!(
636 "- **Samples**: {}\n",
637 self.dataset_info.num_samples
638 ));
639 report.push_str(&format!(
640 "- **Features**: {}\n",
641 self.dataset_info.num_features
642 ));
643
644 report.push_str("\n## Results\n\n");
645 report.push_str(&format!("**Total Runs**: {}\n\n", self.results.len()));
646
647 let mut optimizer_results: HashMap<String, Vec<&ExperimentResult>> = HashMap::new();
649 for result in &self.results {
650 optimizer_results
651 .entry(result.optimizer_name.clone())
652 .or_default()
653 .push(result);
654 }
655
656 for (optimizer, results) in optimizer_results {
657 report.push_str(&format!("### {}\n\n", optimizer));
658
659 if !results.is_empty() {
660 let successful_runs: Vec<&ExperimentResult> = results
662 .iter()
663 .filter(|r| r.status == RunStatus::Success)
664 .copied()
665 .collect();
666
667 report.push_str(&format!(
668 "- **Successful Runs**: {}/{}\n",
669 successful_runs.len(),
670 results.len()
671 ));
672
673 if !successful_runs.is_empty() {
674 for metric in &self.metrics {
675 if let Some(values) = self.get_metric_values(&successful_runs, metric) {
676 let mean = values.iter().sum::<f64>() / values.len() as f64;
677 let std = (values.iter().map(|v| (v - mean).powi(2)).sum::<f64>()
678 / values.len() as f64)
679 .sqrt();
680 report
681 .push_str(&format!("- **{}**: {:.4} ± {:.4}\n", metric, mean, std));
682 }
683 }
684 }
685 }
686 report.push('\n');
687 }
688
689 if !self.notes.is_empty() {
690 report.push_str("## Notes\n\n");
691 for note in &self.notes {
692 report.push_str(&format!(
693 "**{}** ({}): {}\n\n",
694 note.author,
695 note.timestamp.format("%Y-%m-%d %H:%M"),
696 note.content
697 ));
698 }
699 }
700
701 report
702 }
703
704 fn get_metric_values(&self, results: &[&ExperimentResult], metric: &str) -> Option<Vec<f64>> {
705 let mut values = Vec::new();
706 for result in results {
707 if let Some(&value) = result.final_metrics.get(metric) {
708 values.push(value);
709 }
710 }
711 if values.is_empty() {
712 None
713 } else {
714 Some(values)
715 }
716 }
717}
718
719impl Default for ExperimentConfig {
720 fn default() -> Self {
721 Self {
722 random_seed: 42,
723 num_runs: 1,
724 max_epochs: 100,
725 early_stopping: None,
726 hardware_config: HardwareConfig::default(),
727 environment: HashMap::new(),
728 validation_split: 0.2,
729 test_split: 0.1,
730 cv_folds: None,
731 }
732 }
733}
734
735impl Default for CpuInfo {
736 fn default() -> Self {
737 Self {
738 model: "Unknown".to_string(),
739 cores: std::thread::available_parallelism()
740 .map(|p| p.get())
741 .unwrap_or(1),
742 threads: std::thread::available_parallelism()
743 .map(|p| p.get())
744 .unwrap_or(1),
745 frequency_mhz: 0,
746 cache_sizes: Vec::new(),
747 simd_capabilities: Vec::new(),
748 }
749 }
750}
751
752impl Default for MemoryConfig {
753 fn default() -> Self {
754 Self {
755 total_memory_mb: 8192, available_memory_mb: 6144, allocation_strategy: MemoryAllocationStrategy::Standard,
758 pool_size_mb: None,
759 }
760 }
761}
762
763impl Default for ParallelConfig {
764 fn default() -> Self {
765 Self {
766 num_threads: std::thread::available_parallelism()
767 .map(|p| p.get())
768 .unwrap_or(1),
769 thread_affinity: None,
770 work_stealing: true,
771 chunk_size: None,
772 }
773 }
774}
775
776impl Default for DatasetInfo {
777 fn default() -> Self {
778 Self {
779 name: "Unknown".to_string(),
780 description: String::new(),
781 source: String::new(),
782 version: "1.0".to_string(),
783 num_samples: 0,
784 num_features: 0,
785 num_classes: None,
786 data_type: DataType::Tabular,
787 statistics: DatasetStatistics::default(),
788 preprocessing: Vec::new(),
789 }
790 }
791}
792
793impl Default for SystemInfo {
794 fn default() -> Self {
795 Self {
796 os: std::env::consts::OS.to_string(),
797 os_version: String::new(),
798 architecture: std::env::consts::ARCH.to_string(),
799 hostname: String::new(),
800 username: std::env::var("USER").unwrap_or_else(|_| "unknown".to_string()),
801 timezone: String::new(),
802 }
803 }
804}
805
806impl ResourceMonitor {
807 pub fn new(_intervalseconds: u64) -> Self {
809 Self {
810 cpu_usage: Vec::new(),
811 memory_usage: Vec::new(),
812 gpu_memory_usage: Vec::new(),
813 interval_seconds: _intervalseconds,
814 }
815 }
816
817 pub fn start_monitoring(&mut self) {
819 }
822
823 pub fn stop_monitoring(&self) -> ResourceUsage {
825 let peak_cpu = self.cpu_usage.iter().fold(0.0f64, |a, &b| a.max(b));
826 let avg_cpu = if self.cpu_usage.is_empty() {
827 0.0
828 } else {
829 self.cpu_usage.iter().sum::<f64>() / self.cpu_usage.len() as f64
830 };
831
832 let peak_memory = self.memory_usage.iter().fold(0usize, |a, &b| a.max(b));
833 let avg_memory = if self.memory_usage.is_empty() {
834 0
835 } else {
836 self.memory_usage.iter().sum::<usize>() / self.memory_usage.len()
837 };
838
839 ResourceUsage {
840 peak_cpu_usage: peak_cpu,
841 avg_cpu_usage: avg_cpu,
842 peak_memory_mb: peak_memory,
843 avg_memory_mb: avg_memory,
844 peak_gpu_memory_mb: None, total_time_seconds: 0.0, energy_consumption_joules: None,
847 }
848 }
849}
850
851#[cfg(test)]
852mod tests {
853 use super::*;
854
855 #[test]
856 fn test_experiment_creation() {
857 let experiment = Experiment::new("Test Experiment")
858 .hypothesis("Test hypothesis")
859 .description("Test description")
860 .metrics(vec!["accuracy".to_string(), "loss".to_string()]);
861
862 assert_eq!(experiment.name, "Test Experiment");
863 assert_eq!(experiment.hypothesis, "Test hypothesis");
864 assert_eq!(experiment.description, "Test description");
865 assert_eq!(experiment.metrics.len(), 2);
866 assert_eq!(experiment.status, ExperimentStatus::Planning);
867 }
868
869 #[test]
870 fn test_experiment_lifecycle() {
871 let mut experiment = Experiment::new("Lifecycle Test");
872
873 experiment.status = ExperimentStatus::Ready;
875 assert!(experiment.start().is_ok());
876 assert_eq!(experiment.status, ExperimentStatus::Running);
877 assert!(experiment.timeline.started_at.is_some());
878
879 assert!(experiment.complete().is_ok());
881 assert_eq!(experiment.status, ExperimentStatus::Completed);
882 assert!(experiment.timeline.completed_at.is_some());
883 assert!(experiment.timeline.actual_duration.is_some());
884 }
885
886 #[test]
887 fn test_experiment_notes() {
888 let mut experiment = Experiment::new("Notes Test");
889
890 experiment.add_note("Researcher", "Initial observation", NoteType::Observation);
891 experiment.add_note("Researcher", "Found an issue", NoteType::Issue);
892
893 assert_eq!(experiment.notes.len(), 2);
894 assert_eq!(experiment.notes[0].note_type, NoteType::Observation);
895 assert_eq!(experiment.notes[1].note_type, NoteType::Issue);
896 }
897}