use crate::error::{OptimError, Result};
use crate::optimizers::*;
use crate::unified_api::OptimizerConfig;
use chrono::{DateTime, Utc};
use scirs2_core::ndarray::{Array1, Array2};
use scirs2_core::numeric::Float;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Experiment {
pub id: String,
pub name: String,
pub hypothesis: String,
pub description: String,
pub status: ExperimentStatus,
pub config: ExperimentConfig,
pub optimizer_configs: HashMap<String, OptimizerConfig<f64>>,
pub dataset_info: DatasetInfo,
pub metrics: Vec<String>,
pub results: Vec<ExperimentResult>,
pub reproducibility: ReproducibilityInfo,
pub timeline: ExperimentTimeline,
pub notes: Vec<ExperimentNote>,
pub metadata: ExperimentMetadata,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum ExperimentStatus {
Planning,
Ready,
Running,
Completed,
Failed,
Paused,
Cancelled,
Analyzing,
Published,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExperimentConfig {
pub random_seed: u64,
pub num_runs: usize,
pub max_epochs: usize,
pub early_stopping: Option<EarlyStoppingConfig>,
pub hardware_config: HardwareConfig,
pub environment: HashMap<String, String>,
pub validation_split: f64,
pub test_split: f64,
pub cv_folds: Option<usize>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EarlyStoppingConfig {
pub monitor_metric: String,
pub patience: usize,
pub min_improvement: f64,
pub mode: OptimizationMode,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum OptimizationMode {
Minimize,
Maximize,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct HardwareConfig {
pub cpu_info: CpuInfo,
pub gpu_info: Option<GpuInfo>,
pub memory_config: MemoryConfig,
pub parallel_config: ParallelConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CpuInfo {
pub model: String,
pub cores: usize,
pub threads: usize,
pub frequency_mhz: u32,
pub cache_sizes: Vec<String>,
pub simd_capabilities: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GpuInfo {
pub model: String,
pub memory_mb: usize,
pub compute_capability: String,
pub cuda_version: Option<String>,
pub driver_version: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryConfig {
pub total_memory_mb: usize,
pub available_memory_mb: usize,
pub allocation_strategy: MemoryAllocationStrategy,
pub pool_size_mb: Option<usize>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum MemoryAllocationStrategy {
Standard,
Pooled,
MemoryMapped,
Compressed,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ParallelConfig {
pub num_threads: usize,
pub thread_affinity: Option<Vec<usize>>,
pub work_stealing: bool,
pub chunk_size: Option<usize>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DatasetInfo {
pub name: String,
pub description: String,
pub source: String,
pub version: String,
pub num_samples: usize,
pub num_features: usize,
pub num_classes: Option<usize>,
pub data_type: DataType,
pub statistics: DatasetStatistics,
pub preprocessing: Vec<PreprocessingStep>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum DataType {
Tabular,
Image,
Text,
Audio,
Video,
TimeSeries,
Graph,
MultiModal,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct DatasetStatistics {
pub feature_means: Vec<f64>,
pub feature_stds: Vec<f64>,
pub feature_ranges: Vec<(f64, f64)>,
pub class_distribution: Option<HashMap<String, usize>>,
pub missing_values: Vec<usize>,
pub correlation_matrix: Option<Array2<f64>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PreprocessingStep {
pub name: String,
pub description: String,
pub parameters: HashMap<String, serde_json::Value>,
pub order: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExperimentResult {
pub run_id: String,
pub optimizer_name: String,
pub start_time: DateTime<Utc>,
pub end_time: Option<DateTime<Utc>>,
pub status: RunStatus,
pub final_metrics: HashMap<String, f64>,
pub training_history: TrainingHistory,
pub resource_usage: ResourceUsage,
pub error_info: Option<String>,
pub metadata: HashMap<String, serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum RunStatus {
Success,
Failed,
Terminated,
Timeout,
Cancelled,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TrainingHistory {
pub epochs: Vec<usize>,
pub train_metrics: HashMap<String, Vec<f64>>,
pub val_metrics: HashMap<String, Vec<f64>>,
pub learning_rates: Vec<f64>,
pub gradient_norms: Vec<f64>,
pub parameter_norms: Vec<f64>,
pub step_times: Vec<f64>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ResourceUsage {
pub peak_cpu_usage: f64,
pub avg_cpu_usage: f64,
pub peak_memory_mb: usize,
pub avg_memory_mb: usize,
pub peak_gpu_memory_mb: Option<usize>,
pub total_time_seconds: f64,
pub energy_consumption_joules: Option<f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ReproducibilityInfo {
pub environment_hash: String,
pub git_commit: Option<String>,
pub code_checksum: String,
pub dependency_versions: HashMap<String, String>,
pub system_info: SystemInfo,
pub checklist: ReproducibilityChecklist,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SystemInfo {
pub os: String,
pub os_version: String,
pub architecture: String,
pub hostname: String,
pub username: String,
pub timezone: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ReproducibilityChecklist {
pub random_seed_set: bool,
pub dependencies_pinned: bool,
pub data_version_controlled: bool,
pub code_version_controlled: bool,
pub environment_documented: bool,
pub hardware_documented: bool,
pub results_archived: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExperimentTimeline {
pub created_at: DateTime<Utc>,
pub started_at: Option<DateTime<Utc>>,
pub completed_at: Option<DateTime<Utc>>,
pub estimated_duration: Option<chrono::Duration>,
pub actual_duration: Option<chrono::Duration>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExperimentNote {
pub timestamp: DateTime<Utc>,
pub author: String,
pub content: String,
pub note_type: NoteType,
pub run_id: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum NoteType {
Observation,
Issue,
Solution,
Hypothesis,
Conclusion,
Question,
Reminder,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ExperimentMetadata {
pub tags: Vec<String>,
pub research_question: String,
pub expected_outcomes: Vec<String>,
pub success_criteria: Vec<String>,
pub related_experiments: Vec<String>,
pub references: Vec<String>,
}
pub struct ExperimentRunner {
experiment: Experiment,
resource_monitor: ResourceMonitor,
progress_callback: Option<Box<dyn Fn(f64) + Send + Sync>>,
}
impl std::fmt::Debug for ExperimentRunner {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ExperimentRunner")
.field("experiment", &self.experiment)
.field("resource_monitor", &self.resource_monitor)
.field("progress_callback", &self.progress_callback.is_some())
.finish()
}
}
#[derive(Debug)]
pub struct ResourceMonitor {
cpu_usage: Vec<f64>,
memory_usage: Vec<usize>,
gpu_memory_usage: Vec<Option<usize>>,
interval_seconds: u64,
}
impl Experiment {
pub fn new(name: &str) -> Self {
let now = Utc::now();
Self {
id: uuid::Uuid::new_v4().to_string(),
name: name.to_string(),
hypothesis: String::new(),
description: String::new(),
status: ExperimentStatus::Planning,
config: ExperimentConfig::default(),
optimizer_configs: HashMap::new(),
dataset_info: DatasetInfo::default(),
metrics: Vec::new(),
results: Vec::new(),
reproducibility: ReproducibilityInfo::default(),
timeline: ExperimentTimeline {
created_at: now,
started_at: None,
completed_at: None,
estimated_duration: None,
actual_duration: None,
},
notes: Vec::new(),
metadata: ExperimentMetadata::default(),
}
}
pub fn hypothesis(mut self, hypothesis: &str) -> Self {
self.hypothesis = hypothesis.to_string();
self
}
pub fn description(mut self, description: &str) -> Self {
self.description = description.to_string();
self
}
pub fn add_optimizer_config(mut self, name: &str, config: OptimizerConfig<f64>) -> Self {
self.optimizer_configs.insert(name.to_string(), config);
self
}
pub fn dataset(mut self, datasetinfo: DatasetInfo) -> Self {
self.dataset_info = datasetinfo;
self
}
pub fn metrics(mut self, metrics: Vec<String>) -> Self {
self.metrics = metrics;
self
}
pub fn add_note(&mut self, author: &str, content: &str, notetype: NoteType) {
let note = ExperimentNote {
timestamp: Utc::now(),
author: author.to_string(),
content: content.to_string(),
note_type: notetype,
run_id: None,
};
self.notes.push(note);
}
pub fn start(&mut self) -> Result<()> {
if self.status != ExperimentStatus::Ready && self.status != ExperimentStatus::Planning {
return Err(OptimError::InvalidConfig(format!(
"Cannot start experiment in status {:?}",
self.status
)));
}
self.status = ExperimentStatus::Running;
self.timeline.started_at = Some(Utc::now());
Ok(())
}
pub fn complete(&mut self) -> Result<()> {
if self.status != ExperimentStatus::Running {
return Err(OptimError::InvalidConfig(format!(
"Cannot complete experiment in status {:?}",
self.status
)));
}
self.status = ExperimentStatus::Completed;
self.timeline.completed_at = Some(Utc::now());
if let (Some(start), Some(end)) = (self.timeline.started_at, self.timeline.completed_at) {
self.timeline.actual_duration = Some(end - start);
}
Ok(())
}
pub fn generate_report(&self) -> String {
let mut report = String::new();
report.push_str(&format!("# Experiment Report: {}\n\n", self.name));
report.push_str(&format!("**ID**: {}\n", self.id));
report.push_str(&format!("**Status**: {:?}\n", self.status));
report.push_str(&format!("**Hypothesis**: {}\n\n", self.hypothesis));
if !self.description.is_empty() {
report.push_str(&format!("## Description\n\n{}\n\n", self.description));
}
report.push_str("## Configuration\n\n");
report.push_str(&format!("- **Random Seed**: {}\n", self.config.random_seed));
report.push_str(&format!("- **Number of Runs**: {}\n", self.config.num_runs));
report.push_str(&format!("- **Max Epochs**: {}\n", self.config.max_epochs));
report.push_str("\n## Optimizers\n\n");
for name in self.optimizer_configs.keys() {
report.push_str(&format!("- {}\n", name));
}
report.push_str("\n## Dataset\n\n");
report.push_str(&format!("- **Name**: {}\n", self.dataset_info.name));
report.push_str(&format!(
"- **Samples**: {}\n",
self.dataset_info.num_samples
));
report.push_str(&format!(
"- **Features**: {}\n",
self.dataset_info.num_features
));
report.push_str("\n## Results\n\n");
report.push_str(&format!("**Total Runs**: {}\n\n", self.results.len()));
let mut optimizer_results: HashMap<String, Vec<&ExperimentResult>> = HashMap::new();
for result in &self.results {
optimizer_results
.entry(result.optimizer_name.clone())
.or_default()
.push(result);
}
for (optimizer, results) in optimizer_results {
report.push_str(&format!("### {}\n\n", optimizer));
if !results.is_empty() {
let successful_runs: Vec<&ExperimentResult> = results
.iter()
.filter(|r| r.status == RunStatus::Success)
.copied()
.collect();
report.push_str(&format!(
"- **Successful Runs**: {}/{}\n",
successful_runs.len(),
results.len()
));
if !successful_runs.is_empty() {
for metric in &self.metrics {
if let Some(values) = self.get_metric_values(&successful_runs, metric) {
let mean = values.iter().sum::<f64>() / values.len() as f64;
let std = (values.iter().map(|v| (v - mean).powi(2)).sum::<f64>()
/ values.len() as f64)
.sqrt();
report
.push_str(&format!("- **{}**: {:.4} ± {:.4}\n", metric, mean, std));
}
}
}
}
report.push('\n');
}
if !self.notes.is_empty() {
report.push_str("## Notes\n\n");
for note in &self.notes {
report.push_str(&format!(
"**{}** ({}): {}\n\n",
note.author,
note.timestamp.format("%Y-%m-%d %H:%M"),
note.content
));
}
}
report
}
fn get_metric_values(&self, results: &[&ExperimentResult], metric: &str) -> Option<Vec<f64>> {
let mut values = Vec::new();
for result in results {
if let Some(&value) = result.final_metrics.get(metric) {
values.push(value);
}
}
if values.is_empty() {
None
} else {
Some(values)
}
}
}
impl Default for ExperimentConfig {
fn default() -> Self {
Self {
random_seed: 42,
num_runs: 1,
max_epochs: 100,
early_stopping: None,
hardware_config: HardwareConfig::default(),
environment: HashMap::new(),
validation_split: 0.2,
test_split: 0.1,
cv_folds: None,
}
}
}
impl Default for CpuInfo {
fn default() -> Self {
Self {
model: "Unknown".to_string(),
cores: std::thread::available_parallelism()
.map(|p| p.get())
.unwrap_or(1),
threads: std::thread::available_parallelism()
.map(|p| p.get())
.unwrap_or(1),
frequency_mhz: 0,
cache_sizes: Vec::new(),
simd_capabilities: Vec::new(),
}
}
}
impl Default for MemoryConfig {
fn default() -> Self {
Self {
total_memory_mb: 8192, available_memory_mb: 6144, allocation_strategy: MemoryAllocationStrategy::Standard,
pool_size_mb: None,
}
}
}
impl Default for ParallelConfig {
fn default() -> Self {
Self {
num_threads: std::thread::available_parallelism()
.map(|p| p.get())
.unwrap_or(1),
thread_affinity: None,
work_stealing: true,
chunk_size: None,
}
}
}
impl Default for DatasetInfo {
fn default() -> Self {
Self {
name: "Unknown".to_string(),
description: String::new(),
source: String::new(),
version: "1.0".to_string(),
num_samples: 0,
num_features: 0,
num_classes: None,
data_type: DataType::Tabular,
statistics: DatasetStatistics::default(),
preprocessing: Vec::new(),
}
}
}
impl Default for SystemInfo {
fn default() -> Self {
Self {
os: std::env::consts::OS.to_string(),
os_version: String::new(),
architecture: std::env::consts::ARCH.to_string(),
hostname: String::new(),
username: std::env::var("USER").unwrap_or_else(|_| "unknown".to_string()),
timezone: String::new(),
}
}
}
impl ResourceMonitor {
pub fn new(_intervalseconds: u64) -> Self {
Self {
cpu_usage: Vec::new(),
memory_usage: Vec::new(),
gpu_memory_usage: Vec::new(),
interval_seconds: _intervalseconds,
}
}
pub fn start_monitoring(&mut self) {
}
pub fn stop_monitoring(&self) -> ResourceUsage {
let peak_cpu = self.cpu_usage.iter().fold(0.0f64, |a, &b| a.max(b));
let avg_cpu = if self.cpu_usage.is_empty() {
0.0
} else {
self.cpu_usage.iter().sum::<f64>() / self.cpu_usage.len() as f64
};
let peak_memory = self.memory_usage.iter().fold(0usize, |a, &b| a.max(b));
let avg_memory = if self.memory_usage.is_empty() {
0
} else {
self.memory_usage.iter().sum::<usize>() / self.memory_usage.len()
};
ResourceUsage {
peak_cpu_usage: peak_cpu,
avg_cpu_usage: avg_cpu,
peak_memory_mb: peak_memory,
avg_memory_mb: avg_memory,
peak_gpu_memory_mb: None, total_time_seconds: 0.0, energy_consumption_joules: None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_experiment_creation() {
let experiment = Experiment::new("Test Experiment")
.hypothesis("Test hypothesis")
.description("Test description")
.metrics(vec!["accuracy".to_string(), "loss".to_string()]);
assert_eq!(experiment.name, "Test Experiment");
assert_eq!(experiment.hypothesis, "Test hypothesis");
assert_eq!(experiment.description, "Test description");
assert_eq!(experiment.metrics.len(), 2);
assert_eq!(experiment.status, ExperimentStatus::Planning);
}
#[test]
fn test_experiment_lifecycle() {
let mut experiment = Experiment::new("Lifecycle Test");
experiment.status = ExperimentStatus::Ready;
assert!(experiment.start().is_ok());
assert_eq!(experiment.status, ExperimentStatus::Running);
assert!(experiment.timeline.started_at.is_some());
assert!(experiment.complete().is_ok());
assert_eq!(experiment.status, ExperimentStatus::Completed);
assert!(experiment.timeline.completed_at.is_some());
assert!(experiment.timeline.actual_duration.is_some());
}
#[test]
fn test_experiment_notes() {
let mut experiment = Experiment::new("Notes Test");
experiment.add_note("Researcher", "Initial observation", NoteType::Observation);
experiment.add_note("Researcher", "Found an issue", NoteType::Issue);
assert_eq!(experiment.notes.len(), 2);
assert_eq!(experiment.notes[0].note_type, NoteType::Observation);
assert_eq!(experiment.notes[1].note_type, NoteType::Issue);
}
}