use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CostPerformancePoint {
pub name: String,
pub gpu_hours: f64,
pub cost_usd: f64,
pub accuracy: f64,
pub loss: f64,
pub memory_gb: f64,
pub is_pareto_optimal: bool,
pub config: ConfigParams,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ConfigParams {
pub lora_rank: Option<u32>,
pub quant_bits: Option<u8>,
pub temperature: Option<f32>,
pub alpha: Option<f32>,
pub batch_size: Option<usize>,
pub learning_rate: Option<f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CostModel {
pub gpu_type: String,
pub cost_per_hour: f64,
pub memory_gb: f64,
pub performance_factor: f64,
}
impl CostModel {
pub fn a100_80gb() -> Self {
Self {
gpu_type: "A100-80GB".to_string(),
cost_per_hour: 2.21,
memory_gb: 80.0,
performance_factor: 1.0,
}
}
pub fn a100_40gb() -> Self {
Self {
gpu_type: "A100-40GB".to_string(),
cost_per_hour: 1.10,
memory_gb: 40.0,
performance_factor: 0.9,
}
}
pub fn v100() -> Self {
Self {
gpu_type: "V100".to_string(),
cost_per_hour: 0.90,
memory_gb: 16.0,
performance_factor: 0.5,
}
}
pub fn t4() -> Self {
Self {
gpu_type: "T4".to_string(),
cost_per_hour: 0.35,
memory_gb: 16.0,
performance_factor: 0.25,
}
}
pub fn custom(gpu_type: &str, cost_per_hour: f64, memory_gb: f64) -> Self {
Self {
gpu_type: gpu_type.to_string(),
cost_per_hour,
memory_gb,
performance_factor: 1.0,
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Constraints {
pub max_gpu_hours: Option<f64>,
pub max_cost_usd: Option<f64>,
pub min_accuracy: Option<f64>,
pub max_memory_gb: Option<f64>,
pub max_loss: Option<f64>,
}
impl Constraints {
pub fn new() -> Self {
Self::default()
}
pub fn with_max_gpu_hours(mut self, hours: f64) -> Self {
self.max_gpu_hours = Some(hours);
self
}
pub fn with_max_cost(mut self, cost: f64) -> Self {
self.max_cost_usd = Some(cost);
self
}
pub fn with_min_accuracy(mut self, accuracy: f64) -> Self {
self.min_accuracy = Some(accuracy);
self
}
pub fn with_max_memory(mut self, memory_gb: f64) -> Self {
self.max_memory_gb = Some(memory_gb);
self
}
pub fn is_satisfied(&self, point: &CostPerformancePoint) -> bool {
if let Some(max_hours) = self.max_gpu_hours {
if point.gpu_hours > max_hours {
return false;
}
}
if let Some(max_cost) = self.max_cost_usd {
if point.cost_usd > max_cost {
return false;
}
}
if let Some(min_acc) = self.min_accuracy {
if point.accuracy < min_acc {
return false;
}
}
if let Some(max_mem) = self.max_memory_gb {
if point.memory_gb > max_mem {
return false;
}
}
if let Some(max_loss) = self.max_loss {
if point.loss > max_loss {
return false;
}
}
true
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CostPerformanceAnalysis {
pub points: Vec<CostPerformancePoint>,
pub pareto_frontier: Vec<CostPerformancePoint>,
pub best_accuracy: Option<CostPerformancePoint>,
pub best_efficiency: Option<CostPerformancePoint>,
pub lowest_cost: Option<CostPerformancePoint>,
}
impl CostPerformanceAnalysis {
pub fn from_points(mut points: Vec<CostPerformancePoint>) -> Self {
let pareto = compute_pareto_frontier(&points);
for point in &mut points {
point.is_pareto_optimal = pareto.iter().any(|p| {
(p.cost_usd - point.cost_usd).abs() < 1e-6
&& (p.accuracy - point.accuracy).abs() < 1e-6
});
}
let pareto_frontier = pareto;
let best_accuracy = points
.iter()
.max_by(|a, b| {
a.accuracy
.partial_cmp(&b.accuracy)
.unwrap_or(std::cmp::Ordering::Equal)
})
.cloned();
let best_efficiency = points
.iter()
.filter(|p| p.cost_usd > 0.0)
.max_by(|a, b| {
let eff_a = a.accuracy / a.cost_usd;
let eff_b = b.accuracy / b.cost_usd;
eff_a
.partial_cmp(&eff_b)
.unwrap_or(std::cmp::Ordering::Equal)
})
.cloned();
let lowest_cost = points
.iter()
.min_by(|a, b| {
a.cost_usd
.partial_cmp(&b.cost_usd)
.unwrap_or(std::cmp::Ordering::Equal)
})
.cloned();
Self {
points,
pareto_frontier,
best_accuracy,
best_efficiency,
lowest_cost,
}
}
pub fn recommend(&self, constraints: &Constraints) -> Vec<Recommendation> {
let mut recommendations = Vec::new();
let valid_points: Vec<_> = self
.points
.iter()
.filter(|p| constraints.is_satisfied(p))
.collect();
if valid_points.is_empty() {
return recommendations;
}
if let Some(best_acc) = valid_points.iter().max_by(|a, b| {
a.accuracy
.partial_cmp(&b.accuracy)
.unwrap_or(std::cmp::Ordering::Equal)
}) {
recommendations.push(Recommendation {
reason: "Best accuracy within constraints".to_string(),
point: (*best_acc).clone(),
});
}
if let Some(best_eff) = valid_points
.iter()
.filter(|p| p.cost_usd > 0.0)
.max_by(|a, b| {
let eff_a = a.accuracy / a.cost_usd;
let eff_b = b.accuracy / b.cost_usd;
eff_a
.partial_cmp(&eff_b)
.unwrap_or(std::cmp::Ordering::Equal)
})
{
if recommendations
.iter()
.all(|r| r.point.name != best_eff.name)
{
recommendations.push(Recommendation {
reason: "Best accuracy per dollar within constraints".to_string(),
point: (*best_eff).clone(),
});
}
}
for point in &self.pareto_frontier {
if constraints.is_satisfied(point)
&& recommendations.iter().all(|r| r.point.name != point.name)
{
recommendations.push(Recommendation {
reason: "Pareto-optimal configuration".to_string(),
point: point.clone(),
});
}
}
recommendations
}
pub fn to_table(&self) -> String {
let mut table = String::new();
table.push_str("Cost-Performance Analysis\n");
table.push_str(
"┌────────────────────────┬───────────┬───────────┬──────────┬─────────┬─────────┐\n",
);
table.push_str(
"│ Configuration │ GPU Hours │ Cost (USD)│ Accuracy │ Loss │ Pareto? │\n",
);
table.push_str(
"├────────────────────────┼───────────┼───────────┼──────────┼─────────┼─────────┤\n",
);
for point in &self.points {
let pareto_mark = if point.is_pareto_optimal { "★" } else { " " };
table.push_str(&format!(
"│ {:22} │ {:>9.2} │ {:>9.2} │ {:>7.1}% │ {:>7.4} │ {} │\n",
truncate(&point.name, 22),
point.gpu_hours,
point.cost_usd,
point.accuracy * 100.0,
point.loss,
pareto_mark
));
}
table.push_str(
"└────────────────────────┴───────────┴───────────┴──────────┴─────────┴─────────┘\n",
);
table.push_str(
"\n★ = Pareto-optimal (no configuration is both cheaper AND more accurate)\n",
);
table
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Recommendation {
pub reason: String,
pub point: CostPerformancePoint,
}
fn compute_pareto_frontier(points: &[CostPerformancePoint]) -> Vec<CostPerformancePoint> {
let mut frontier = Vec::new();
for point in points {
let is_dominated = points.iter().any(|other| {
other.cost_usd <= point.cost_usd
&& other.accuracy >= point.accuracy
&& (other.cost_usd < point.cost_usd || other.accuracy > point.accuracy)
});
if !is_dominated {
frontier.push(point.clone());
}
}
frontier.sort_by(|a, b| {
a.cost_usd
.partial_cmp(&b.cost_usd)
.unwrap_or(std::cmp::Ordering::Equal)
});
frontier
}
fn truncate(s: &str, max_len: usize) -> String {
if s.len() <= max_len {
format!("{s:max_len$}")
} else {
format!("{}...", &s[..max_len - 3])
}
}
pub fn generate_sample_points(cost_model: &CostModel) -> Vec<CostPerformancePoint> {
vec![
CostPerformancePoint {
name: "Full Fine-Tuning (7B)".to_string(),
gpu_hours: 120.0,
cost_usd: 120.0 * cost_model.cost_per_hour,
accuracy: 0.92,
loss: 0.25,
memory_gb: 56.0,
is_pareto_optimal: false,
config: ConfigParams {
lora_rank: None,
quant_bits: Some(16),
batch_size: Some(8),
learning_rate: Some(5e-5),
..Default::default()
},
},
CostPerformancePoint {
name: "LoRA r=64".to_string(),
gpu_hours: 24.0,
cost_usd: 24.0 * cost_model.cost_per_hour,
accuracy: 0.89,
loss: 0.30,
memory_gb: 28.0,
is_pareto_optimal: false,
config: ConfigParams {
lora_rank: Some(64),
quant_bits: Some(16),
batch_size: Some(16),
learning_rate: Some(2e-4),
..Default::default()
},
},
CostPerformancePoint {
name: "LoRA r=32".to_string(),
gpu_hours: 18.0,
cost_usd: 18.0 * cost_model.cost_per_hour,
accuracy: 0.87,
loss: 0.33,
memory_gb: 24.0,
is_pareto_optimal: false,
config: ConfigParams {
lora_rank: Some(32),
quant_bits: Some(16),
batch_size: Some(16),
learning_rate: Some(2e-4),
..Default::default()
},
},
CostPerformancePoint {
name: "QLoRA 4-bit r=64".to_string(),
gpu_hours: 20.0,
cost_usd: 20.0 * cost_model.cost_per_hour,
accuracy: 0.86,
loss: 0.35,
memory_gb: 12.0,
is_pareto_optimal: false,
config: ConfigParams {
lora_rank: Some(64),
quant_bits: Some(4),
batch_size: Some(32),
learning_rate: Some(3e-4),
..Default::default()
},
},
CostPerformancePoint {
name: "Distillation T=4".to_string(),
gpu_hours: 36.0,
cost_usd: 36.0 * cost_model.cost_per_hour,
accuracy: 0.84,
loss: 0.38,
memory_gb: 32.0,
is_pareto_optimal: false,
config: ConfigParams {
temperature: Some(4.0),
alpha: Some(0.7),
batch_size: Some(16),
learning_rate: Some(1e-4),
..Default::default()
},
},
CostPerformancePoint {
name: "LoRA + Distillation".to_string(),
gpu_hours: 32.0,
cost_usd: 32.0 * cost_model.cost_per_hour,
accuracy: 0.88,
loss: 0.31,
memory_gb: 26.0,
is_pareto_optimal: false,
config: ConfigParams {
lora_rank: Some(32),
temperature: Some(4.0),
alpha: Some(0.5),
batch_size: Some(16),
learning_rate: Some(2e-4),
..Default::default()
},
},
CostPerformancePoint {
name: "QLoRA 8-bit r=32".to_string(),
gpu_hours: 16.0,
cost_usd: 16.0 * cost_model.cost_per_hour,
accuracy: 0.85,
loss: 0.36,
memory_gb: 16.0,
is_pareto_optimal: false,
config: ConfigParams {
lora_rank: Some(32),
quant_bits: Some(8),
batch_size: Some(32),
learning_rate: Some(2e-4),
..Default::default()
},
},
CostPerformancePoint {
name: "LoRA r=8".to_string(),
gpu_hours: 8.0,
cost_usd: 8.0 * cost_model.cost_per_hour,
accuracy: 0.81,
loss: 0.42,
memory_gb: 18.0,
is_pareto_optimal: false,
config: ConfigParams {
lora_rank: Some(8),
quant_bits: Some(16),
batch_size: Some(32),
learning_rate: Some(5e-4),
..Default::default()
},
},
]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_pareto_frontier() {
let points = vec![
CostPerformancePoint {
name: "A".to_string(),
gpu_hours: 10.0,
cost_usd: 10.0,
accuracy: 0.8,
loss: 0.3,
memory_gb: 16.0,
is_pareto_optimal: false,
config: Default::default(),
},
CostPerformancePoint {
name: "B".to_string(),
gpu_hours: 20.0,
cost_usd: 20.0,
accuracy: 0.9,
loss: 0.2,
memory_gb: 24.0,
is_pareto_optimal: false,
config: Default::default(),
},
CostPerformancePoint {
name: "C".to_string(), gpu_hours: 25.0,
cost_usd: 25.0,
accuracy: 0.85,
loss: 0.25,
memory_gb: 24.0,
is_pareto_optimal: false,
config: Default::default(),
},
];
let frontier = compute_pareto_frontier(&points);
assert_eq!(frontier.len(), 2); assert!(frontier.iter().any(|p| p.name == "A"));
assert!(frontier.iter().any(|p| p.name == "B"));
assert!(!frontier.iter().any(|p| p.name == "C"));
}
#[test]
fn test_constraints() {
let constraints = Constraints::new()
.with_max_cost(50.0)
.with_min_accuracy(0.85);
let point_good = CostPerformancePoint {
name: "Good".to_string(),
gpu_hours: 20.0,
cost_usd: 40.0,
accuracy: 0.90,
loss: 0.25,
memory_gb: 16.0,
is_pareto_optimal: false,
config: Default::default(),
};
let point_expensive = CostPerformancePoint {
name: "Expensive".to_string(),
gpu_hours: 30.0,
cost_usd: 60.0,
accuracy: 0.95,
loss: 0.20,
memory_gb: 16.0,
is_pareto_optimal: false,
config: Default::default(),
};
let point_low_acc = CostPerformancePoint {
name: "LowAcc".to_string(),
gpu_hours: 10.0,
cost_usd: 20.0,
accuracy: 0.80,
loss: 0.35,
memory_gb: 16.0,
is_pareto_optimal: false,
config: Default::default(),
};
assert!(constraints.is_satisfied(&point_good));
assert!(!constraints.is_satisfied(&point_expensive)); assert!(!constraints.is_satisfied(&point_low_acc)); }
#[test]
fn test_analysis_recommendations() {
let cost_model = CostModel::a100_80gb();
let points = generate_sample_points(&cost_model);
let analysis = CostPerformanceAnalysis::from_points(points);
assert!(!analysis.pareto_frontier.is_empty());
assert!(analysis.best_accuracy.is_some());
assert!(analysis.best_efficiency.is_some());
let constraints = Constraints::new().with_max_cost(50.0);
let recommendations = analysis.recommend(&constraints);
assert!(!recommendations.is_empty());
}
#[test]
fn test_cost_models() {
let a100 = CostModel::a100_80gb();
assert_eq!(a100.gpu_type, "A100-80GB");
assert!(a100.cost_per_hour > 0.0);
let v100 = CostModel::v100();
assert!(v100.cost_per_hour < a100.cost_per_hour);
}
}