1use crate::resources::{ResourceRequirement, ResourceType, CapabilityLevel};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6
7pub trait Workload: Send + Sync + std::fmt::Debug {
9 fn workload_type(&self) -> WorkloadType;
11
12 fn resource_requirements(&self) -> Vec<ResourceRequirement>;
14
15 fn estimated_utilization(&self) -> HashMap<ResourceType, f64>;
17
18 fn performance_characteristics(&self) -> PerformanceCharacteristics;
20
21 fn metadata(&self) -> WorkloadMetadata;
23
24 fn validate(&self) -> crate::error::Result<()>;
26
27 fn clone_workload(&self) -> Box<dyn Workload>;
29}
30
31#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
33pub enum WorkloadType {
34 AIInference,
36 AITraining,
38 DataProcessing,
40 WebApplication,
42 Database,
44 ComputeIntensive,
46 MemoryIntensive,
48 IOIntensive,
50 Custom(String),
52}
53
54impl std::fmt::Display for WorkloadType {
55 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56 match self {
57 WorkloadType::AIInference => write!(f, "AI Inference"),
58 WorkloadType::AITraining => write!(f, "AI Training"),
59 WorkloadType::DataProcessing => write!(f, "Data Processing"),
60 WorkloadType::WebApplication => write!(f, "Web Application"),
61 WorkloadType::Database => write!(f, "Database"),
62 WorkloadType::ComputeIntensive => write!(f, "Compute Intensive"),
63 WorkloadType::MemoryIntensive => write!(f, "Memory Intensive"),
64 WorkloadType::IOIntensive => write!(f, "I/O Intensive"),
65 WorkloadType::Custom(name) => write!(f, "Custom: {}", name),
66 }
67 }
68}
69
70#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct PerformanceCharacteristics {
73 pub latency_profile: LatencyProfile,
75 pub throughput_profile: ThroughputProfile,
77 pub scalability: ScalabilityProfile,
79 pub resource_patterns: ResourcePatterns,
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize)]
85pub struct LatencyProfile {
86 pub expected_latency_ms: f64,
88 pub acceptable_latency_ms: f64,
90 pub sensitivity: LatencySensitivity,
92}
93
94#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
96pub enum LatencySensitivity {
97 VeryLow,
99 Low,
101 Medium,
103 High,
105 VeryHigh,
107}
108
109#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct ThroughputProfile {
112 pub expected_ops_per_sec: f64,
114 pub minimum_ops_per_sec: f64,
116 pub peak_ops_per_sec: f64,
118 pub consistency_requirement: ThroughputConsistency,
120}
121
122#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
124pub enum ThroughputConsistency {
125 Variable,
127 Consistent,
129 Guaranteed,
131}
132
133#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct ScalabilityProfile {
136 pub horizontal_scaling: ScalingCapability,
138 pub vertical_scaling: ScalingCapability,
140 pub scaling_efficiency: f64,
142}
143
144#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
146pub enum ScalingCapability {
147 None,
149 Limited,
151 Good,
153 Excellent,
155}
156
157#[derive(Debug, Clone, Serialize, Deserialize)]
159pub struct ResourcePatterns {
160 pub cpu_pattern: UsagePattern,
162 pub memory_pattern: UsagePattern,
164 pub io_pattern: UsagePattern,
166 pub network_pattern: UsagePattern,
168}
169
170#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
172pub enum UsagePattern {
173 Constant,
175 Bursty,
177 Periodic,
179 Ramping,
181 Unpredictable,
183}
184
185#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct WorkloadMetadata {
188 pub name: String,
190 pub description: String,
192 pub version: String,
194 pub tags: Vec<String>,
196 pub vendor: Option<String>,
198 pub license: Option<String>,
200 pub documentation_url: Option<String>,
202}
203
204#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct AIInferenceWorkload {
207 pub model_params: ModelParameters,
209 pub inference_config: InferenceConfig,
211 pub metadata: WorkloadMetadata,
213}
214
215impl AIInferenceWorkload {
216 pub fn new(model_params: ModelParameters) -> Self {
218 Self {
219 model_params,
220 inference_config: InferenceConfig::default(),
221 metadata: WorkloadMetadata {
222 name: "AI Inference".to_string(),
223 description: "AI/ML model inference workload".to_string(),
224 version: "1.0.0".to_string(),
225 tags: vec!["ai".to_string(), "inference".to_string(), "ml".to_string()],
226 vendor: None,
227 license: None,
228 documentation_url: None,
229 },
230 }
231 }
232
233 pub fn with_config(mut self, config: InferenceConfig) -> Self {
235 self.inference_config = config;
236 self
237 }
238
239 pub fn with_metadata(mut self, metadata: WorkloadMetadata) -> Self {
241 self.metadata = metadata;
242 self
243 }
244}
245
246impl Workload for AIInferenceWorkload {
247 fn workload_type(&self) -> WorkloadType {
248 WorkloadType::AIInference
249 }
250
251 fn resource_requirements(&self) -> Vec<ResourceRequirement> {
252 let mut requirements = Vec::new();
253
254 let memory_gb = self.model_params.memory_required;
256 requirements.push(
257 ResourceRequirement::new(ResourceType::Memory)
258 .minimum_gb(memory_gb)
259 .recommended_gb(memory_gb * 1.5)
260 .critical()
261 );
262
263 if self.model_params.prefer_gpu {
265 let gpu_level = match self.model_params.parameters {
266 params if params >= 70_000_000_000 => CapabilityLevel::Exceptional,
267 params if params >= 13_000_000_000 => CapabilityLevel::VeryHigh,
268 params if params >= 7_000_000_000 => CapabilityLevel::High,
269 params if params >= 1_000_000_000 => CapabilityLevel::Medium,
270 _ => CapabilityLevel::Low,
271 };
272
273 requirements.push(
274 ResourceRequirement::new(ResourceType::GPU)
275 .minimum_level(gpu_level)
276 .preferred_vendor(Some("NVIDIA"))
277 );
278 }
279
280 let cpu_level = match self.model_params.compute_required {
282 compute if compute >= 8.0 => CapabilityLevel::VeryHigh,
283 compute if compute >= 6.0 => CapabilityLevel::High,
284 compute if compute >= 4.0 => CapabilityLevel::Medium,
285 compute if compute >= 2.0 => CapabilityLevel::Low,
286 _ => CapabilityLevel::VeryLow,
287 };
288
289 requirements.push(
290 ResourceRequirement::new(ResourceType::CPU)
291 .minimum_level(cpu_level)
292 );
293
294 let storage_gb = (self.model_params.parameters as f64 * 4.0 / 1_000_000_000.0) + 10.0; requirements.push(
297 ResourceRequirement::new(ResourceType::Storage)
298 .minimum_gb(storage_gb)
299 .recommended_gb(storage_gb * 2.0)
300 );
301
302 requirements
303 }
304
305 fn estimated_utilization(&self) -> HashMap<ResourceType, f64> {
306 let mut utilization = HashMap::new();
307
308 let cpu_util = if self.model_params.prefer_gpu { 20.0 } else { 80.0 };
310 utilization.insert(ResourceType::CPU, cpu_util);
311
312 if self.model_params.prefer_gpu {
314 utilization.insert(ResourceType::GPU, 75.0);
315 }
316
317 utilization.insert(ResourceType::Memory, 60.0);
319
320 utilization.insert(ResourceType::Storage, 15.0);
322
323 utilization.insert(ResourceType::Network, 25.0);
325
326 utilization
327 }
328
329 fn performance_characteristics(&self) -> PerformanceCharacteristics {
330 let latency_ms = if self.model_params.prefer_gpu { 50.0 } else { 200.0 };
331 let throughput = if self.model_params.prefer_gpu { 20.0 } else { 5.0 };
332
333 PerformanceCharacteristics {
334 latency_profile: LatencyProfile {
335 expected_latency_ms: latency_ms,
336 acceptable_latency_ms: latency_ms * 2.0,
337 sensitivity: LatencySensitivity::High,
338 },
339 throughput_profile: ThroughputProfile {
340 expected_ops_per_sec: throughput,
341 minimum_ops_per_sec: throughput * 0.5,
342 peak_ops_per_sec: throughput * 1.5,
343 consistency_requirement: ThroughputConsistency::Consistent,
344 },
345 scalability: ScalabilityProfile {
346 horizontal_scaling: ScalingCapability::Good,
347 vertical_scaling: ScalingCapability::Excellent,
348 scaling_efficiency: 0.8,
349 },
350 resource_patterns: ResourcePatterns {
351 cpu_pattern: if self.model_params.prefer_gpu { UsagePattern::Bursty } else { UsagePattern::Constant },
352 memory_pattern: UsagePattern::Constant,
353 io_pattern: UsagePattern::Bursty,
354 network_pattern: UsagePattern::Bursty,
355 },
356 }
357 }
358
359 fn metadata(&self) -> WorkloadMetadata {
360 self.metadata.clone()
361 }
362
363 fn validate(&self) -> crate::error::Result<()> {
364 if self.model_params.parameters == 0 {
365 return Err(crate::error::SystemAnalysisError::invalid_workload(
366 "Model parameters cannot be zero"
367 ));
368 }
369
370 if self.model_params.memory_required <= 0.0 {
371 return Err(crate::error::SystemAnalysisError::invalid_workload(
372 "Memory requirement must be positive"
373 ));
374 }
375
376 if self.model_params.compute_required <= 0.0 {
377 return Err(crate::error::SystemAnalysisError::invalid_workload(
378 "Compute requirement must be positive"
379 ));
380 }
381
382 Ok(())
383 }
384
385 fn clone_workload(&self) -> Box<dyn Workload> {
386 Box::new(self.clone())
387 }
388}
389
390#[derive(Debug, Clone, Serialize, Deserialize)]
392pub struct ModelParameters {
393 pub parameters: u64,
395 pub memory_required: f64,
397 pub compute_required: f64,
399 pub prefer_gpu: bool,
401 pub architecture: Option<String>,
403 pub quantization: QuantizationLevel,
405 pub context_length: Option<u32>,
407 pub batch_size: u32,
409}
410
411impl ModelParameters {
412 pub fn new() -> Self {
414 Self {
415 parameters: 0,
416 memory_required: 0.0,
417 compute_required: 0.0,
418 prefer_gpu: false,
419 architecture: None,
420 quantization: QuantizationLevel::None,
421 context_length: None,
422 batch_size: 1,
423 }
424 }
425
426 pub fn parameters(mut self, params: u64) -> Self {
428 self.parameters = params;
429 self.memory_required = (params as f64 * 4.0 / 1_000_000_000.0) * 1.2; self.compute_required = (params as f64 / 1_000_000_000.0).min(10.0).max(1.0); self
434 }
435
436 pub fn memory_required(mut self, gb: f64) -> Self {
438 self.memory_required = gb;
439 self
440 }
441
442 pub fn compute_required(mut self, compute: f64) -> Self {
444 self.compute_required = compute.clamp(0.0, 10.0);
445 self
446 }
447
448 pub fn prefer_gpu(mut self, prefer: bool) -> Self {
450 self.prefer_gpu = prefer;
451 self
452 }
453
454 pub fn architecture(mut self, arch: impl Into<String>) -> Self {
456 self.architecture = Some(arch.into());
457 self
458 }
459
460 pub fn quantization(mut self, quant: QuantizationLevel) -> Self {
462 self.quantization = quant;
463 match quant {
465 QuantizationLevel::None => {},
466 QuantizationLevel::Int8 => self.memory_required *= 0.5,
467 QuantizationLevel::Int4 => self.memory_required *= 0.25,
468 QuantizationLevel::Custom(_) => self.memory_required *= 0.5,
469 }
470 self
471 }
472
473 pub fn context_length(mut self, length: u32) -> Self {
475 self.context_length = Some(length);
476 self
477 }
478
479 pub fn batch_size(mut self, size: u32) -> Self {
481 self.batch_size = size;
482 self
483 }
484}
485
486impl Default for ModelParameters {
487 fn default() -> Self {
488 Self::new()
489 }
490}
491
492#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
494pub enum QuantizationLevel {
495 None,
497 Int8,
499 Int4,
501 Custom(u8),
503}
504
505#[derive(Debug, Clone, Serialize, Deserialize)]
507pub struct InferenceConfig {
508 pub max_concurrent_requests: u32,
510 pub request_timeout_sec: u32,
512 pub enable_batching: bool,
514 pub dynamic_batching: Option<DynamicBatchingConfig>,
516 pub caching: CachingConfig,
518}
519
520impl Default for InferenceConfig {
521 fn default() -> Self {
522 Self {
523 max_concurrent_requests: 10,
524 request_timeout_sec: 30,
525 enable_batching: true,
526 dynamic_batching: Some(DynamicBatchingConfig::default()),
527 caching: CachingConfig::default(),
528 }
529 }
530}
531
532#[derive(Debug, Clone, Serialize, Deserialize)]
534pub struct DynamicBatchingConfig {
535 pub max_batch_size: u32,
537 pub max_wait_time_ms: u32,
539 pub preferred_batch_sizes: Vec<u32>,
541}
542
543impl Default for DynamicBatchingConfig {
544 fn default() -> Self {
545 Self {
546 max_batch_size: 8,
547 max_wait_time_ms: 100,
548 preferred_batch_sizes: vec![1, 2, 4, 8],
549 }
550 }
551}
552
553#[derive(Debug, Clone, Serialize, Deserialize)]
555pub struct CachingConfig {
556 pub enable_kv_cache: bool,
558 pub cache_size_mb: u32,
560 pub eviction_policy: CacheEvictionPolicy,
562}
563
564impl Default for CachingConfig {
565 fn default() -> Self {
566 Self {
567 enable_kv_cache: true,
568 cache_size_mb: 1024,
569 eviction_policy: CacheEvictionPolicy::LRU,
570 }
571 }
572}
573
574#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
576pub enum CacheEvictionPolicy {
577 LRU,
579 LFU,
581 FIFO,
583 Random,
585}
586
587#[derive(Debug, Clone, Serialize, Deserialize)]
589pub struct AITrainingWorkload {
590 pub model_params: ModelParameters,
592 pub training_config: TrainingConfig,
594 pub metadata: WorkloadMetadata,
596}
597
598#[derive(Debug, Clone, Serialize, Deserialize)]
600pub struct TrainingConfig {
601 pub batch_size: u32,
603 pub epochs: u32,
605 pub learning_rate: f64,
607 pub distributed: bool,
609}
610
611impl Default for TrainingConfig {
612 fn default() -> Self {
613 Self {
614 batch_size: 32,
615 epochs: 10,
616 learning_rate: 0.001,
617 distributed: false,
618 }
619 }
620}
621
622#[cfg(test)]
623mod tests {
624 use super::*;
625
626 #[test]
627 fn test_model_parameters_creation() {
628 let params = ModelParameters::new()
629 .parameters(1_000_000_000) .quantization(QuantizationLevel::Int8)
631 .context_length(2048)
632 .batch_size(4);
633
634 assert_eq!(params.parameters, 1_000_000_000);
635 assert_eq!(params.quantization, QuantizationLevel::Int8);
636 assert_eq!(params.context_length, Some(2048));
637 assert_eq!(params.batch_size, 4);
638 }
639
640 #[test]
641 fn test_ai_inference_workload_creation() {
642 let model_params = ModelParameters::new()
643 .parameters(7_000_000_000) .context_length(4096);
645
646 let workload = AIInferenceWorkload::new(model_params);
647 let requirements = workload.resource_requirements();
648
649 assert!(!requirements.is_empty());
650 assert_eq!(workload.workload_type(), WorkloadType::AIInference);
651 }
652
653 #[test]
654 fn test_workload_type_display() {
655 assert_eq!(format!("{}", WorkloadType::AIInference), "AI Inference");
656 assert_eq!(format!("{}", WorkloadType::AIInference), "AI Inference");
657 assert_eq!(format!("{}", WorkloadType::DataProcessing), "Data Processing");
658 assert_eq!(format!("{}", WorkloadType::WebApplication), "Web Application");
659 assert_eq!(format!("{}", WorkloadType::Custom("Custom Task".to_string())), "Custom: Custom Task");
660 }
661
662 #[test]
663 fn test_quantization_level() {
664 let _none = QuantizationLevel::None;
666 let _int8 = QuantizationLevel::Int8;
667 let _int4 = QuantizationLevel::Int4;
668 let _custom = QuantizationLevel::Custom(16);
669
670 assert!(true);
672 }
673
674 #[test]
675 fn test_workload_validation() {
676 let model_params = ModelParameters::new()
677 .parameters(1_000_000_000);
678
679 let workload = AIInferenceWorkload::new(model_params);
680
681 assert!(workload.validate().is_ok());
683 }
684
685 #[test]
686 fn test_model_parameters_builder() {
687 let params = ModelParameters::new()
688 .parameters(1_000_000_000)
689 .memory_required(8.0)
690 .compute_required(7.5)
691 .prefer_gpu(true)
692 .architecture("transformer")
693 .quantization(QuantizationLevel::Int8)
694 .context_length(2048)
695 .batch_size(4);
696
697 assert_eq!(params.parameters, 1_000_000_000);
698 assert_eq!(params.compute_required, 7.5);
699 assert!(params.prefer_gpu);
700 assert_eq!(params.architecture, Some("transformer".to_string()));
701 assert_eq!(params.context_length, Some(2048));
702 assert_eq!(params.batch_size, 4);
703 }
704}