use crate::infrastructure::profiling::{BenchmarkResult, InferenceProfile, PhaseStats};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExportRequest {
pub format: ExportFormat,
pub time_range_minutes: Option<u32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ExportFormat {
CSV,
JSON,
PDF,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecentProfilesResponse {
pub profiles: Vec<InferenceProfile>,
pub count: u32,
pub timestamp: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProfileStatsResponse {
pub stats: Vec<PhaseStats>,
pub window: String,
pub sample_count: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TimelineResponse {
pub data_points: Vec<TimelinePoint>,
pub window_minutes: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TimelinePoint {
pub timestamp: u64,
pub throughput_tokens_per_sec: f32,
pub avg_latency_ms: f32,
pub gpu_memory_mb: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnomaliesResponse {
pub anomalies: Vec<AnomalyInfo>,
pub timestamp: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnomalyInfo {
pub metric: String,
pub severity: AnomalySeverity,
pub message: String,
pub deviation_percent: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum AnomalySeverity {
Low,
Medium,
High,
Critical,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ComparisonResponse {
pub baseline: BenchmarkResult,
pub current: BenchmarkResult,
pub throughput_delta_percent: f32,
pub latency_delta_percent: f32,
pub memory_delta_percent: f32,
pub status: ComparisonStatus,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ComparisonStatus {
Regression,
Improvement,
NoChange,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExportResponse {
pub format: String,
pub file_size_bytes: u64,
pub record_count: u32,
pub download_url: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceGaugeResponse {
pub throughput_tokens_per_sec: f32,
pub avg_latency_ms: f32,
pub p99_latency_ms: f32,
pub gpu_utilization_percent: f32,
pub status: GaugeStatus,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum GaugeStatus {
Excellent,
Good,
Warning,
Critical,
}
impl GaugeStatus {
pub fn from_metrics(p99_latency: f32, throughput: f32) -> Self {
match (p99_latency, throughput) {
(lat, _) if lat < 50.0 => GaugeStatus::Excellent,
(lat, _) if lat < 150.0 => GaugeStatus::Good,
(lat, _) if lat < 500.0 => GaugeStatus::Warning,
_ => GaugeStatus::Critical,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PhaseBreakdownResponse {
pub phases: Vec<PhaseBreakdownItem>,
pub total_time_ms: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PhaseBreakdownItem {
pub phase: String,
pub duration_ms: f32,
pub percentage: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LatencyHistogramResponse {
pub buckets: Vec<HistogramBucket>,
pub p50: f32,
pub p95: f32,
pub p99: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HistogramBucket {
pub range_ms: String,
pub count: u32,
pub percentage: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelComparisonResponse {
pub models: Vec<ModelMetrics>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelMetrics {
pub model_id: String,
pub avg_throughput_tokens_per_sec: f32,
pub avg_latency_ms: f32,
pub efficiency_score: f32, pub inference_count: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProfilingHealthResponse {
pub status: HealthStatus,
pub profiles_collected: u32,
pub anomalies_detected: u32,
pub last_profile_timestamp: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum HealthStatus {
Healthy,
Degraded,
Error,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_gauge_status_determination() {
assert_eq!(
GaugeStatus::from_metrics(30.0, 100.0),
GaugeStatus::Excellent
);
assert_eq!(GaugeStatus::from_metrics(100.0, 100.0), GaugeStatus::Good);
assert_eq!(GaugeStatus::from_metrics(300.0, 50.0), GaugeStatus::Warning);
assert_eq!(
GaugeStatus::from_metrics(600.0, 10.0),
GaugeStatus::Critical
);
}
#[test]
fn test_phase_breakdown() {
let response = PhaseBreakdownResponse {
phases: vec![
PhaseBreakdownItem {
phase: "tokenization".to_string(),
duration_ms: 10.0,
percentage: 1.2,
},
PhaseBreakdownItem {
phase: "inference".to_string(),
duration_ms: 800.0,
percentage: 97.5,
},
PhaseBreakdownItem {
phase: "detokenization".to_string(),
duration_ms: 10.0,
percentage: 1.2,
},
],
total_time_ms: 820.0,
};
assert_eq!(response.phases.len(), 3);
assert!((response.total_time_ms - 820.0).abs() < 0.1);
}
#[test]
fn test_model_comparison() {
let response = ModelComparisonResponse {
models: vec![
ModelMetrics {
model_id: "llama-7b".to_string(),
avg_throughput_tokens_per_sec: 100.0,
avg_latency_ms: 100.0,
efficiency_score: 0.025,
inference_count: 50,
},
ModelMetrics {
model_id: "mistral-7b".to_string(),
avg_throughput_tokens_per_sec: 110.0,
avg_latency_ms: 90.0,
efficiency_score: 0.027,
inference_count: 60,
},
],
};
assert_eq!(response.models.len(), 2);
}
}