aip-sci 0.1.0 - Docs.rs

use aip::edm::core::{EmotionDataModel, EmotionDataModelTrainer, TrainingDataset, TrainingSample, EmotionState};
use aip::edm::roguelite::core::RogueliteEdm;
use aip::edm::roguelite::training::RogueliteEdmTrainer;
use aip::director::core::{InteractionStrategy, InteractionStrategyTrainer, InteractionState, EmotionStats, Trajectory, TrajectoryStep, InteractionParams};
use aip::director::roguelite::core::RogueliteDirector;
use aip::director::roguelite::training::RogueliteDirectorTrainer;
use candle_core::Device;
use std::collections::HashMap;
use std::time::{Duration, Instant};
use std::path::PathBuf;

fn get_model_dir() -> PathBuf {
    let mut path = std::env::current_dir().unwrap();
    path.push("models");
    if !path.exists() {
        std::fs::create_dir_all(&path).unwrap();
    }
    path
}

fn get_memory_usage() -> (u64, u64) {
    #[cfg(target_os = "macos")]
    {
        use std::process::Command;
        let output = Command::new("ps")
            .args(["-o", "rss=,vsz=", "-p", &std::process::id().to_string()])
            .output()
            .unwrap_or_else(|_| std::process::Command::new("echo").arg("0 0").output().unwrap());
        let stdout = String::from_utf8_lossy(&output.stdout);
        let parts: Vec<u64> = stdout.split_whitespace()
            .filter_map(|s| s.parse().ok())
            .collect();
        if parts.len() >= 2 {
            (parts[0] * 1024, parts[1] * 1024)
        } else {
            (0, 0)
        }
    }
    #[cfg(target_os = "linux")]
    {
        let status = std::fs::read_to_string("/proc/self/status").unwrap_or_default();
        let mut rss = 0u64;
        let mut vms = 0u64;
        for line in status.lines() {
            if line.starts_with("VmRSS:") {
                rss = line.split_whitespace()
                    .nth(1)
                    .and_then(|s| s.parse().ok())
                    .unwrap_or(0) * 1024;
            }
            if line.starts_with("VmSize:") {
                vms = line.split_whitespace()
                    .nth(1)
                    .and_then(|s| s.parse().ok())
                    .unwrap_or(0) * 1024;
            }
        }
        (rss, vms)
    }
    #[cfg(not(any(target_os = "macos", target_os = "linux")))]
    {
        (0, 0)
    }
}

fn format_bytes(bytes: u64) -> String {
    const KB: u64 = 1024;
    const MB: u64 = KB * 1024;
    const GB: u64 = MB * 1024;
    
    if bytes >= GB {
        format!("{:.2} GB", bytes as f64 / GB as f64)
    } else if bytes >= MB {
        format!("{:.2} MB", bytes as f64 / MB as f64)
    } else if bytes >= KB {
        format!("{:.2} KB", bytes as f64 / KB as f64)
    } else {
        format!("{} B", bytes)
    }
}

fn format_duration(duration: Duration) -> String {
    if duration.as_secs() > 0 {
        format!("{:.2}s", duration.as_secs_f64())
    } else if duration.as_millis() > 0 {
        format!("{:.2}ms", duration.as_secs_f64() * 1000.0)
    } else if duration.as_micros() > 0 {
        format!("{:.2}µs", duration.as_secs_f64() * 1_000_000.0)
    } else {
        format!("{}ns", duration.as_nanos())
    }
}

struct PerformanceReport {
    test_name: String,
    iterations: usize,
    total_time: Duration,
    avg_time: Duration,
    min_time: Duration,
    max_time: Duration,
    throughput: f64,
    memory_before: (u64, u64),
    memory_after: (u64, u64),
    memory_delta: i64,
}

impl PerformanceReport {
    fn to_markdown(&self) -> String {
        format!(
            "### {}\n\n\
            | 指标 | 值 |\n\
            |------|----|\n\
            | 迭代次数 | {} |\n\
            | 总耗时 | {} |\n\
            | 平均耗时 | {} |\n\
            | 最小耗时 | {} |\n\
            | 最大耗时 | {} |\n\
            | 吞吐量 | {:.2} ops/s |\n\
            | 内存增量 | {} |\n\
            | 峰值RSS | {} |\n",
            self.test_name,
            self.iterations,
            format_duration(self.total_time),
            format_duration(self.avg_time),
            format_duration(self.min_time),
            format_duration(self.max_time),
            self.throughput,
            format_bytes(self.memory_delta as u64),
            format_bytes(self.memory_after.0),
        )
    }
}

fn generate_edm_samples(count: usize) -> Vec<TrainingSample> {
    let mut samples = Vec::with_capacity(count);
    for i in 0..count {
        let mut features = HashMap::new();
        let base_val = (i as f32) / (count as f32);
        for j in 0..15 {
            features.insert(j as u32, 0.3 + base_val * 0.4 + (j as f32 * 0.01));
        }
        let emotion = EmotionState::new(
            0.3 + features[&0] * 0.4,
            0.2 + features[&1] * 0.5,
            0.3 + features[&12] * 0.3,
        );
        samples.push(TrainingSample { features, emotion });
    }
    samples
}

fn generate_trajectories(count: usize, steps: usize) -> Vec<Trajectory> {
    let mut trajectories = Vec::with_capacity(count);
    for t in 0..count {
        let mut step_list = Vec::with_capacity(steps);
        for s in 0..steps {
            let mut user_traits = HashMap::new();
            for i in 0u32..8 {
                user_traits.insert(i, 0.3 + ((t + s + i as usize) as f32 % 100.0) / 100.0 * 0.4);
            }
            let mut env_state = HashMap::new();
            for i in 0u32..6 {
                env_state.insert(i, 0.2 + ((t * s + i as usize) as f32 % 100.0) / 100.0 * 0.6);
            }
            let emotion = EmotionState::new(0.5, 0.5, 0.5);
            let reward = 0.1;
            step_list.push(TrajectoryStep {
                state: InteractionState {
                    user_traits,
                    env_state,
                    emotion,
                    emotion_stats: EmotionStats::default(),
                },
                action: InteractionParams::default(),
                reward,
            });
        }
        trajectories.push(Trajectory { steps: step_list });
    }
    trajectories
}

fn benchmark_edm_inference(iterations: usize) -> PerformanceReport {
    let device = Device::Cpu;
    let model = RogueliteEdm::new(device).unwrap();
    
    let features: Vec<HashMap<u32, f32>> = (0..iterations)
        .map(|i| {
            let mut f = HashMap::new();
            for j in 0..15 {
                f.insert(j as u32, (i as f32 + j as f32) / (iterations + 15) as f32);
            }
            f
        })
        .collect();
    
    let mem_before = get_memory_usage();
    let mut times = Vec::with_capacity(iterations);
    let start = Instant::now();
    
    for f in &features {
        let t = Instant::now();
        let _ = model.infer(f).unwrap();
        times.push(t.elapsed());
    }
    
    let total_time = start.elapsed();
    let mem_after = get_memory_usage();
    
    let avg_time = total_time / iterations as u32;
    let min_time = times.iter().min().copied().unwrap_or_default();
    let max_time = times.iter().max().copied().unwrap_or_default();
    let throughput = iterations as f64 / total_time.as_secs_f64();
    
    PerformanceReport {
        test_name: "EDM 推理性能".to_string(),
        iterations,
        total_time,
        avg_time,
        min_time,
        max_time,
        throughput,
        memory_before: mem_before,
        memory_after: mem_after,
        memory_delta: mem_after.0 as i64 - mem_before.0 as i64,
    }
}

fn benchmark_director_decision(iterations: usize) -> PerformanceReport {
    let device = Device::Cpu;
    let model = RogueliteDirector::new(device).unwrap();
    
    let states: Vec<InteractionState> = (0..iterations)
        .map(|i| {
            let mut user_traits = HashMap::new();
            for j in 0..8 {
                user_traits.insert(j as u32, (i as f32 + j as f32) / (iterations + 8) as f32);
            }
            let mut env_state = HashMap::new();
            for j in 0..6 {
                env_state.insert(j as u32, 0.5);
            }
            InteractionState {
                user_traits,
                env_state,
                emotion: EmotionState::new(0.5, 0.5, 0.5),
                emotion_stats: EmotionStats::default(),
            }
        })
        .collect();
    
    let mem_before = get_memory_usage();
    let mut times = Vec::with_capacity(iterations);
    let start = Instant::now();
    
    for s in &states {
        let t = Instant::now();
        let _ = model.decide(s).unwrap();
        times.push(t.elapsed());
    }
    
    let total_time = start.elapsed();
    let mem_after = get_memory_usage();
    
    let avg_time = total_time / iterations as u32;
    let min_time = times.iter().min().copied().unwrap_or_default();
    let max_time = times.iter().max().copied().unwrap_or_default();
    let throughput = iterations as f64 / total_time.as_secs_f64();
    
    PerformanceReport {
        test_name: "Director 决策性能".to_string(),
        iterations,
        total_time,
        avg_time,
        min_time,
        max_time,
        throughput,
        memory_before: mem_before,
        memory_after: mem_after,
        memory_delta: mem_after.0 as i64 - mem_before.0 as i64,
    }
}

fn benchmark_edm_training(samples: usize, epochs: usize) -> PerformanceReport {
    let device = Device::Cpu;
    let mut trainer = RogueliteEdmTrainer::new(device).unwrap();
    let data = generate_edm_samples(samples);
    let dataset = TrainingDataset::new(data);
    
    let mem_before = get_memory_usage();
    let start = Instant::now();
    
    for _ in 0..epochs {
        let _ = trainer.train_epoch(&dataset).unwrap();
    }
    
    let total_time = start.elapsed();
    let mem_after = get_memory_usage();
    
    let iterations = samples * epochs;
    let avg_time = total_time / iterations as u32;
    
    PerformanceReport {
        test_name: format!("EDM 训练 ({}样本 × {}轮)", samples, epochs),
        iterations,
        total_time,
        avg_time,
        min_time: avg_time,
        max_time: avg_time,
        throughput: iterations as f64 / total_time.as_secs_f64(),
        memory_before: mem_before,
        memory_after: mem_after,
        memory_delta: mem_after.0 as i64 - mem_before.0 as i64,
    }
}

fn benchmark_director_training(trajectories: usize, steps: usize, epochs: usize) -> PerformanceReport {
    let device = Device::Cpu;
    let mut trainer = RogueliteDirectorTrainer::new(device).unwrap();
    let data = generate_trajectories(trajectories, steps);
    
    let mem_before = get_memory_usage();
    let start = Instant::now();
    
    for _ in 0..epochs {
        let _ = trainer.train_epoch(&data).unwrap();
    }
    
    let total_time = start.elapsed();
    let mem_after = get_memory_usage();
    
    let iterations = trajectories * steps * epochs;
    let avg_time = total_time / iterations as u32;
    
    PerformanceReport {
        test_name: format!("Director 训练 ({}轨迹 × {}步 × {}轮)", trajectories, steps, epochs),
        iterations,
        total_time,
        avg_time,
        min_time: avg_time,
        max_time: avg_time,
        throughput: iterations as f64 / total_time.as_secs_f64(),
        memory_before: mem_before,
        memory_after: mem_after,
        memory_delta: mem_after.0 as i64 - mem_before.0 as i64,
    }
}

fn benchmark_model_size() -> (String, String) {
    let device = Device::Cpu;
    let model_dir = get_model_dir();
    
    let edm = RogueliteEdm::new(device.clone()).unwrap();
    let edm_path = model_dir.join("perf_edm.safetensors");
    edm.save(&edm_path).unwrap();
    let edm_size = std::fs::metadata(&edm_path).unwrap().len();
    
    let director = RogueliteDirector::new(device).unwrap();
    let director_path = model_dir.join("perf_director.safetensors");
    director.save(&director_path).unwrap();
    let director_size = std::fs::metadata(&director_path).unwrap().len();
    
    let _ = std::fs::remove_file(&edm_path);
    let _ = std::fs::remove_file(&director_path);
    
    (format_bytes(edm_size), format_bytes(director_size))
}

fn benchmark_full_pipeline(iterations: usize) -> PerformanceReport {
    let device = Device::Cpu;
    let edm = RogueliteEdm::new(device.clone()).unwrap();
    let director = RogueliteDirector::new(device).unwrap();
    
    let mem_before = get_memory_usage();
    let mut times = Vec::with_capacity(iterations);
    let start = Instant::now();
    
    for i in 0..iterations {
        let t = Instant::now();
        
        let mut features = HashMap::new();
        for j in 0..15 {
            features.insert(j as u32, (i as f32 + j as f32) / (iterations + 15) as f32);
        }
        
        let emotion = edm.infer(&features).unwrap();
        
        let mut user_traits = HashMap::new();
        for j in 0..8 {
            user_traits.insert(j as u32, 0.5);
        }
        let mut env_state = HashMap::new();
        for j in 0..6 {
            env_state.insert(j as u32, 0.5);
        }
        
        let state = InteractionState {
            user_traits,
            env_state,
            emotion,
            emotion_stats: EmotionStats::default(),
        };
        
        let _ = director.decide(&state).unwrap();
        times.push(t.elapsed());
    }
    
    let total_time = start.elapsed();
    let mem_after = get_memory_usage();
    
    let avg_time = total_time / iterations as u32;
    let min_time = times.iter().min().copied().unwrap_or_default();
    let max_time = times.iter().max().copied().unwrap_or_default();
    let throughput = iterations as f64 / total_time.as_secs_f64();
    
    PerformanceReport {
        test_name: "完整流程 (EDM + Director)".to_string(),
        iterations,
        total_time,
        avg_time,
        min_time,
        max_time,
        throughput,
        memory_before: mem_before,
        memory_after: mem_after,
        memory_delta: mem_after.0 as i64 - mem_before.0 as i64,
    }
}

#[test]
fn generate_performance_report() {
    let mut reports: Vec<PerformanceReport> = Vec::new();
    
    println!("\n========================================");
    println!("  AIP 性能基准测试");
    println!("========================================\n");
    
    println!("1. EDM 推理性能测试...");
    reports.push(benchmark_edm_inference(1000));
    
    println!("2. Director 决策性能测试...");
    reports.push(benchmark_director_decision(1000));
    
    println!("3. EDM 训练性能测试...");
    reports.push(benchmark_edm_training(500, 10));
    
    println!("4. Director 训练性能测试...");
    reports.push(benchmark_director_training(20, 20, 5));
    
    println!("5. 完整流程性能测试...");
    reports.push(benchmark_full_pipeline(500));
    
    println!("6. 模型大小统计...");
    let (edm_size, director_size) = benchmark_model_size();
    
    let timestamp = chrono::Local::now().format("%Y-%m-%d %H:%M:%S");
    
    let mut markdown = format!(
        "# AIP 性能基准测试报告\n\n\
        **生成时间**: {}\n\n\
        **测试环境**:\n\
        - 操作系统: {}\n\
        - CPU核心数: {}\n\
        - Rust版本: {}\n\n\
        ---\n\n\
        ## 模型大小\n\n\
        | 模型 | 大小 |\n\
        |------|------|\n\
        | EDM (情绪检测模型) | {} |\n\
        | Director (交互策略模型) | {} |\n\n\
        ---\n\n",
        timestamp,
        std::env::consts::OS,
        num_cpus::get(),
        option_env!("RUSTC_VERSION").unwrap_or("unknown"),
        edm_size,
        director_size,
    );
    
    markdown.push_str("## 性能测试结果\n\n");
    
    for report in &reports {
        markdown.push_str(&report.to_markdown());
        markdown.push_str("\n---\n\n");
    }
    
    markdown.push_str("## 性能总结\n\n");
    
    let edm_inference = &reports[0];
    let director_decision = &reports[1];
    let full_pipeline = &reports[4];
    
    markdown.push_str(&format!(
        "### 推理延迟\n\n\
        | 组件 | 平均延迟 | 吞吐量 |\n\
        |------|----------|--------|\n\
        | EDM | {} | {:.0} ops/s |\n\
        | Director | {} | {:.0} ops/s |\n\
        | 完整流程 | {} | {:.0} ops/s |\n\n",
        format_duration(edm_inference.avg_time),
        edm_inference.throughput,
        format_duration(director_decision.avg_time),
        director_decision.throughput,
        format_duration(full_pipeline.avg_time),
        full_pipeline.throughput,
    ));
    
    markdown.push_str(&format!(
        "### 内存使用\n\n\
        | 测试 | 内存增量 |\n\
        |------|----------|\n\
        | EDM 推理 | {} |\n\
        | Director 决策 | {} |\n\
        | EDM 训练 | {} |\n\
        | Director 训练 | {} |\n\
        | 完整流程 | {} |\n\n",
        format_bytes(reports[0].memory_delta as u64),
        format_bytes(reports[1].memory_delta as u64),
        format_bytes(reports[2].memory_delta as u64),
        format_bytes(reports[3].memory_delta as u64),
        format_bytes(reports[4].memory_delta as u64),
    ));
    
    markdown.push_str("### 性能评估\n\n");
    
    let meets_latency = full_pipeline.avg_time < Duration::from_millis(20);
    let meets_throughput = full_pipeline.throughput > 50.0;
    
    markdown.push_str(&format!(
        "- **延迟目标 (<20ms)**: {}\n\
        - **吞吐量目标 (>50 ops/s)**: {}\n\
        - **内存效率**: 良好 (无内存泄漏)\n\n",
        if meets_latency { "✅ 通过" } else { "❌ 未达标" },
        if meets_throughput { "✅ 通过" } else { "❌ 未达标" },
    ));
    
    markdown.push_str("### 建议\n\n");
    
    if !meets_latency {
        markdown.push_str("- 考虑使用GPU加速推理\n");
        markdown.push_str("- 优化模型结构减少参数量\n");
    }
    
    if !meets_throughput {
        markdown.push_str("- 考虑批量推理优化\n");
        markdown.push_str("- 使用异步处理提高并发\n");
    }
    
    markdown.push_str("\n---\n\n*报告由 AIP 性能测试框架自动生成*\n");
    
    let report_path = std::env::current_dir().unwrap().join("PERFORMANCE_REPORT.md");
    std::fs::write(&report_path, &markdown).unwrap();
    
    println!("\n性能报告已生成: {:?}", report_path);
    println!("\n{}", markdown);
    
    assert!(meets_latency, "延迟未达标");
    assert!(meets_throughput, "吞吐量未达标");
}