system_analysis/
workloads.rs

1//! Workload definitions and modeling.
2
3use crate::resources::{ResourceRequirement, ResourceType, CapabilityLevel};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6
7/// Trait for workload implementations
8pub trait Workload: Send + Sync + std::fmt::Debug {
9    /// Get the workload type
10    fn workload_type(&self) -> WorkloadType;
11    
12    /// Get resource requirements for this workload
13    fn resource_requirements(&self) -> Vec<ResourceRequirement>;
14    
15    /// Get estimated resource utilization
16    fn estimated_utilization(&self) -> HashMap<ResourceType, f64>;
17    
18    /// Get performance characteristics
19    fn performance_characteristics(&self) -> PerformanceCharacteristics;
20    
21    /// Get workload metadata
22    fn metadata(&self) -> WorkloadMetadata;
23    
24    /// Validate workload configuration
25    fn validate(&self) -> crate::error::Result<()>;
26    
27    /// Clone the workload (for trait objects)
28    fn clone_workload(&self) -> Box<dyn Workload>;
29}
30
31/// Types of workloads
32#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
33pub enum WorkloadType {
34    /// AI/ML inference workload
35    AIInference,
36    /// AI/ML training workload
37    AITraining,
38    /// Data processing workload
39    DataProcessing,
40    /// Web application workload
41    WebApplication,
42    /// Database workload
43    Database,
44    /// Compute-intensive workload
45    ComputeIntensive,
46    /// Memory-intensive workload
47    MemoryIntensive,
48    /// I/O-intensive workload
49    IOIntensive,
50    /// Custom workload type
51    Custom(String),
52}
53
54impl std::fmt::Display for WorkloadType {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        match self {
57            WorkloadType::AIInference => write!(f, "AI Inference"),
58            WorkloadType::AITraining => write!(f, "AI Training"),
59            WorkloadType::DataProcessing => write!(f, "Data Processing"),
60            WorkloadType::WebApplication => write!(f, "Web Application"),
61            WorkloadType::Database => write!(f, "Database"),
62            WorkloadType::ComputeIntensive => write!(f, "Compute Intensive"),
63            WorkloadType::MemoryIntensive => write!(f, "Memory Intensive"),
64            WorkloadType::IOIntensive => write!(f, "I/O Intensive"),
65            WorkloadType::Custom(name) => write!(f, "Custom: {}", name),
66        }
67    }
68}
69
70/// Performance characteristics of a workload
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct PerformanceCharacteristics {
73    /// Expected latency profile
74    pub latency_profile: LatencyProfile,
75    /// Throughput requirements
76    pub throughput_profile: ThroughputProfile,
77    /// Scalability characteristics
78    pub scalability: ScalabilityProfile,
79    /// Resource usage patterns
80    pub resource_patterns: ResourcePatterns,
81}
82
83/// Latency characteristics
84#[derive(Debug, Clone, Serialize, Deserialize)]
85pub struct LatencyProfile {
86    /// Expected latency in milliseconds
87    pub expected_latency_ms: f64,
88    /// Acceptable latency in milliseconds
89    pub acceptable_latency_ms: f64,
90    /// Latency sensitivity
91    pub sensitivity: LatencySensitivity,
92}
93
94/// Latency sensitivity levels
95#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
96pub enum LatencySensitivity {
97    /// Very low latency sensitivity
98    VeryLow,
99    /// Low latency sensitivity
100    Low,
101    /// Medium latency sensitivity
102    Medium,
103    /// High latency sensitivity
104    High,
105    /// Very high latency sensitivity (real-time)
106    VeryHigh,
107}
108
109/// Throughput characteristics
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct ThroughputProfile {
112    /// Expected throughput (operations per second)
113    pub expected_ops_per_sec: f64,
114    /// Minimum acceptable throughput
115    pub minimum_ops_per_sec: f64,
116    /// Peak throughput requirement
117    pub peak_ops_per_sec: f64,
118    /// Throughput consistency requirement
119    pub consistency_requirement: ThroughputConsistency,
120}
121
122/// Throughput consistency requirements
123#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
124pub enum ThroughputConsistency {
125    /// Variable throughput acceptable
126    Variable,
127    /// Consistent throughput preferred
128    Consistent,
129    /// Guaranteed throughput required
130    Guaranteed,
131}
132
133/// Scalability characteristics
134#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct ScalabilityProfile {
136    /// Horizontal scaling capability
137    pub horizontal_scaling: ScalingCapability,
138    /// Vertical scaling capability
139    pub vertical_scaling: ScalingCapability,
140    /// Scaling efficiency
141    pub scaling_efficiency: f64,
142}
143
144/// Scaling capability levels
145#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
146pub enum ScalingCapability {
147    /// No scaling support
148    None,
149    /// Limited scaling support
150    Limited,
151    /// Good scaling support
152    Good,
153    /// Excellent scaling support
154    Excellent,
155}
156
157/// Resource usage patterns
158#[derive(Debug, Clone, Serialize, Deserialize)]
159pub struct ResourcePatterns {
160    /// CPU usage pattern
161    pub cpu_pattern: UsagePattern,
162    /// Memory usage pattern
163    pub memory_pattern: UsagePattern,
164    /// I/O usage pattern
165    pub io_pattern: UsagePattern,
166    /// Network usage pattern
167    pub network_pattern: UsagePattern,
168}
169
170/// Usage pattern types
171#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
172pub enum UsagePattern {
173    /// Constant usage
174    Constant,
175    /// Bursty usage
176    Bursty,
177    /// Periodic usage
178    Periodic,
179    /// Ramping usage
180    Ramping,
181    /// Unpredictable usage
182    Unpredictable,
183}
184
185/// Workload metadata
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct WorkloadMetadata {
188    /// Workload name
189    pub name: String,
190    /// Description
191    pub description: String,
192    /// Version
193    pub version: String,
194    /// Tags for categorization
195    pub tags: Vec<String>,
196    /// Vendor/source information
197    pub vendor: Option<String>,
198    /// License information
199    pub license: Option<String>,
200    /// Documentation URL
201    pub documentation_url: Option<String>,
202}
203
204/// AI inference workload implementation
205#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct AIInferenceWorkload {
207    /// Model parameters
208    pub model_params: ModelParameters,
209    /// Inference configuration
210    pub inference_config: InferenceConfig,
211    /// Metadata
212    pub metadata: WorkloadMetadata,
213}
214
215impl AIInferenceWorkload {
216    /// Create a new AI inference workload
217    pub fn new(model_params: ModelParameters) -> Self {
218        Self {
219            model_params,
220            inference_config: InferenceConfig::default(),
221            metadata: WorkloadMetadata {
222                name: "AI Inference".to_string(),
223                description: "AI/ML model inference workload".to_string(),
224                version: "1.0.0".to_string(),
225                tags: vec!["ai".to_string(), "inference".to_string(), "ml".to_string()],
226                vendor: None,
227                license: None,
228                documentation_url: None,
229            },
230        }
231    }
232
233    /// Set inference configuration
234    pub fn with_config(mut self, config: InferenceConfig) -> Self {
235        self.inference_config = config;
236        self
237    }
238
239    /// Set metadata
240    pub fn with_metadata(mut self, metadata: WorkloadMetadata) -> Self {
241        self.metadata = metadata;
242        self
243    }
244}
245
246impl Workload for AIInferenceWorkload {
247    fn workload_type(&self) -> WorkloadType {
248        WorkloadType::AIInference
249    }
250
251    fn resource_requirements(&self) -> Vec<ResourceRequirement> {
252        let mut requirements = Vec::new();
253
254        // Memory requirement based on model size
255        let memory_gb = self.model_params.memory_required;
256        requirements.push(
257            ResourceRequirement::new(ResourceType::Memory)
258                .minimum_gb(memory_gb)
259                .recommended_gb(memory_gb * 1.5)
260                .critical()
261        );
262
263        // GPU requirement if preferred
264        if self.model_params.prefer_gpu {
265            let gpu_level = match self.model_params.parameters {
266                params if params >= 70_000_000_000 => CapabilityLevel::Exceptional,
267                params if params >= 13_000_000_000 => CapabilityLevel::VeryHigh,
268                params if params >= 7_000_000_000 => CapabilityLevel::High,
269                params if params >= 1_000_000_000 => CapabilityLevel::Medium,
270                _ => CapabilityLevel::Low,
271            };
272
273            requirements.push(
274                ResourceRequirement::new(ResourceType::GPU)
275                    .minimum_level(gpu_level)
276                    .preferred_vendor(Some("NVIDIA"))
277            );
278        }
279
280        // CPU requirement
281        let cpu_level = match self.model_params.compute_required {
282            compute if compute >= 8.0 => CapabilityLevel::VeryHigh,
283            compute if compute >= 6.0 => CapabilityLevel::High,
284            compute if compute >= 4.0 => CapabilityLevel::Medium,
285            compute if compute >= 2.0 => CapabilityLevel::Low,
286            _ => CapabilityLevel::VeryLow,
287        };
288
289        requirements.push(
290            ResourceRequirement::new(ResourceType::CPU)
291                .minimum_level(cpu_level)
292        );
293
294        // Storage requirement
295        let storage_gb = (self.model_params.parameters as f64 * 4.0 / 1_000_000_000.0) + 10.0; // Rough estimate
296        requirements.push(
297            ResourceRequirement::new(ResourceType::Storage)
298                .minimum_gb(storage_gb)
299                .recommended_gb(storage_gb * 2.0)
300        );
301
302        requirements
303    }
304
305    fn estimated_utilization(&self) -> HashMap<ResourceType, f64> {
306        let mut utilization = HashMap::new();
307
308        // CPU utilization depends on whether GPU is available
309        let cpu_util = if self.model_params.prefer_gpu { 20.0 } else { 80.0 };
310        utilization.insert(ResourceType::CPU, cpu_util);
311
312        // GPU utilization if preferred
313        if self.model_params.prefer_gpu {
314            utilization.insert(ResourceType::GPU, 75.0);
315        }
316
317        // Memory utilization
318        utilization.insert(ResourceType::Memory, 60.0);
319
320        // Storage utilization (mostly read)
321        utilization.insert(ResourceType::Storage, 15.0);
322
323        // Network utilization (if serving inference)
324        utilization.insert(ResourceType::Network, 25.0);
325
326        utilization
327    }
328
329    fn performance_characteristics(&self) -> PerformanceCharacteristics {
330        let latency_ms = if self.model_params.prefer_gpu { 50.0 } else { 200.0 };
331        let throughput = if self.model_params.prefer_gpu { 20.0 } else { 5.0 };
332
333        PerformanceCharacteristics {
334            latency_profile: LatencyProfile {
335                expected_latency_ms: latency_ms,
336                acceptable_latency_ms: latency_ms * 2.0,
337                sensitivity: LatencySensitivity::High,
338            },
339            throughput_profile: ThroughputProfile {
340                expected_ops_per_sec: throughput,
341                minimum_ops_per_sec: throughput * 0.5,
342                peak_ops_per_sec: throughput * 1.5,
343                consistency_requirement: ThroughputConsistency::Consistent,
344            },
345            scalability: ScalabilityProfile {
346                horizontal_scaling: ScalingCapability::Good,
347                vertical_scaling: ScalingCapability::Excellent,
348                scaling_efficiency: 0.8,
349            },
350            resource_patterns: ResourcePatterns {
351                cpu_pattern: if self.model_params.prefer_gpu { UsagePattern::Bursty } else { UsagePattern::Constant },
352                memory_pattern: UsagePattern::Constant,
353                io_pattern: UsagePattern::Bursty,
354                network_pattern: UsagePattern::Bursty,
355            },
356        }
357    }
358
359    fn metadata(&self) -> WorkloadMetadata {
360        self.metadata.clone()
361    }
362
363    fn validate(&self) -> crate::error::Result<()> {
364        if self.model_params.parameters == 0 {
365            return Err(crate::error::SystemAnalysisError::invalid_workload(
366                "Model parameters cannot be zero"
367            ));
368        }
369
370        if self.model_params.memory_required <= 0.0 {
371            return Err(crate::error::SystemAnalysisError::invalid_workload(
372                "Memory requirement must be positive"
373            ));
374        }
375
376        if self.model_params.compute_required <= 0.0 {
377            return Err(crate::error::SystemAnalysisError::invalid_workload(
378                "Compute requirement must be positive"
379            ));
380        }
381
382        Ok(())
383    }
384
385    fn clone_workload(&self) -> Box<dyn Workload> {
386        Box::new(self.clone())
387    }
388}
389
390/// Model parameters for AI workloads
391#[derive(Debug, Clone, Serialize, Deserialize)]
392pub struct ModelParameters {
393    /// Number of parameters in the model
394    pub parameters: u64,
395    /// Memory required in GB
396    pub memory_required: f64,
397    /// Compute intensity (0-10 scale)
398    pub compute_required: f64,
399    /// Prefer GPU acceleration
400    pub prefer_gpu: bool,
401    /// Model architecture type
402    pub architecture: Option<String>,
403    /// Quantization level
404    pub quantization: QuantizationLevel,
405    /// Context length
406    pub context_length: Option<u32>,
407    /// Batch size
408    pub batch_size: u32,
409}
410
411impl ModelParameters {
412    /// Create new model parameters
413    pub fn new() -> Self {
414        Self {
415            parameters: 0,
416            memory_required: 0.0,
417            compute_required: 0.0,
418            prefer_gpu: false,
419            architecture: None,
420            quantization: QuantizationLevel::None,
421            context_length: None,
422            batch_size: 1,
423        }
424    }
425
426    /// Set number of parameters
427    pub fn parameters(mut self, params: u64) -> Self {
428        self.parameters = params;
429        // Auto-estimate memory requirement based on parameters
430        self.memory_required = (params as f64 * 4.0 / 1_000_000_000.0) * 1.2; // 4 bytes per param + overhead
431        // Auto-estimate compute requirement based on parameters
432        self.compute_required = (params as f64 / 1_000_000_000.0).min(10.0).max(1.0); // Scale based on model size
433        self
434    }
435
436    /// Set memory requirement in GB
437    pub fn memory_required(mut self, gb: f64) -> Self {
438        self.memory_required = gb;
439        self
440    }
441
442    /// Set compute requirement (0-10 scale)
443    pub fn compute_required(mut self, compute: f64) -> Self {
444        self.compute_required = compute.clamp(0.0, 10.0);
445        self
446    }
447
448    /// Set GPU preference
449    pub fn prefer_gpu(mut self, prefer: bool) -> Self {
450        self.prefer_gpu = prefer;
451        self
452    }
453
454    /// Set model architecture
455    pub fn architecture(mut self, arch: impl Into<String>) -> Self {
456        self.architecture = Some(arch.into());
457        self
458    }
459
460    /// Set quantization level
461    pub fn quantization(mut self, quant: QuantizationLevel) -> Self {
462        self.quantization = quant;
463        // Adjust memory requirement based on quantization
464        match quant {
465            QuantizationLevel::None => {},
466            QuantizationLevel::Int8 => self.memory_required *= 0.5,
467            QuantizationLevel::Int4 => self.memory_required *= 0.25,
468            QuantizationLevel::Custom(_) => self.memory_required *= 0.5,
469        }
470        self
471    }
472
473    /// Set context length
474    pub fn context_length(mut self, length: u32) -> Self {
475        self.context_length = Some(length);
476        self
477    }
478
479    /// Set batch size
480    pub fn batch_size(mut self, size: u32) -> Self {
481        self.batch_size = size;
482        self
483    }
484}
485
486impl Default for ModelParameters {
487    fn default() -> Self {
488        Self::new()
489    }
490}
491
492/// Quantization levels for models
493#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
494pub enum QuantizationLevel {
495    /// No quantization (full precision)
496    None,
497    /// 8-bit integer quantization
498    Int8,
499    /// 4-bit integer quantization
500    Int4,
501    /// Custom quantization
502    Custom(u8),
503}
504
505/// Inference configuration
506#[derive(Debug, Clone, Serialize, Deserialize)]
507pub struct InferenceConfig {
508    /// Maximum concurrent requests
509    pub max_concurrent_requests: u32,
510    /// Request timeout in seconds
511    pub request_timeout_sec: u32,
512    /// Enable batching
513    pub enable_batching: bool,
514    /// Dynamic batching settings
515    pub dynamic_batching: Option<DynamicBatchingConfig>,
516    /// Caching configuration
517    pub caching: CachingConfig,
518}
519
520impl Default for InferenceConfig {
521    fn default() -> Self {
522        Self {
523            max_concurrent_requests: 10,
524            request_timeout_sec: 30,
525            enable_batching: true,
526            dynamic_batching: Some(DynamicBatchingConfig::default()),
527            caching: CachingConfig::default(),
528        }
529    }
530}
531
532/// Dynamic batching configuration
533#[derive(Debug, Clone, Serialize, Deserialize)]
534pub struct DynamicBatchingConfig {
535    /// Maximum batch size
536    pub max_batch_size: u32,
537    /// Maximum wait time in milliseconds
538    pub max_wait_time_ms: u32,
539    /// Preferred batch sizes
540    pub preferred_batch_sizes: Vec<u32>,
541}
542
543impl Default for DynamicBatchingConfig {
544    fn default() -> Self {
545        Self {
546            max_batch_size: 8,
547            max_wait_time_ms: 100,
548            preferred_batch_sizes: vec![1, 2, 4, 8],
549        }
550    }
551}
552
553/// Caching configuration
554#[derive(Debug, Clone, Serialize, Deserialize)]
555pub struct CachingConfig {
556    /// Enable KV cache
557    pub enable_kv_cache: bool,
558    /// Cache size in MB
559    pub cache_size_mb: u32,
560    /// Cache eviction policy
561    pub eviction_policy: CacheEvictionPolicy,
562}
563
564impl Default for CachingConfig {
565    fn default() -> Self {
566        Self {
567            enable_kv_cache: true,
568            cache_size_mb: 1024,
569            eviction_policy: CacheEvictionPolicy::LRU,
570        }
571    }
572}
573
574/// Cache eviction policies
575#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
576pub enum CacheEvictionPolicy {
577    /// Least Recently Used
578    LRU,
579    /// Least Frequently Used
580    LFU,
581    /// First In, First Out
582    FIFO,
583    /// Random eviction
584    Random,
585}
586
587/// AI training workload (placeholder for future implementation)
588#[derive(Debug, Clone, Serialize, Deserialize)]
589pub struct AITrainingWorkload {
590    /// Model parameters
591    pub model_params: ModelParameters,
592    /// Training configuration
593    pub training_config: TrainingConfig,
594    /// Metadata
595    pub metadata: WorkloadMetadata,
596}
597
598/// Training configuration (basic structure)
599#[derive(Debug, Clone, Serialize, Deserialize)]
600pub struct TrainingConfig {
601    /// Batch size for training
602    pub batch_size: u32,
603    /// Number of epochs
604    pub epochs: u32,
605    /// Learning rate
606    pub learning_rate: f64,
607    /// Distributed training
608    pub distributed: bool,
609}
610
611impl Default for TrainingConfig {
612    fn default() -> Self {
613        Self {
614            batch_size: 32,
615            epochs: 10,
616            learning_rate: 0.001,
617            distributed: false,
618        }
619    }
620}
621
622#[cfg(test)]
623mod tests {
624    use super::*;
625
626    #[test]
627    fn test_model_parameters_creation() {
628        let params = ModelParameters::new()
629            .parameters(1_000_000_000) // 1B parameters
630            .quantization(QuantizationLevel::Int8)
631            .context_length(2048)
632            .batch_size(4);
633
634        assert_eq!(params.parameters, 1_000_000_000);
635        assert_eq!(params.quantization, QuantizationLevel::Int8);
636        assert_eq!(params.context_length, Some(2048));
637        assert_eq!(params.batch_size, 4);
638    }
639
640    #[test]
641    fn test_ai_inference_workload_creation() {
642        let model_params = ModelParameters::new()
643            .parameters(7_000_000_000) // 7B model
644            .context_length(4096);
645
646        let workload = AIInferenceWorkload::new(model_params);
647        let requirements = workload.resource_requirements();
648        
649        assert!(!requirements.is_empty());
650        assert_eq!(workload.workload_type(), WorkloadType::AIInference);
651    }
652
653    #[test]
654    fn test_workload_type_display() {
655        assert_eq!(format!("{}", WorkloadType::AIInference), "AI Inference");
656        assert_eq!(format!("{}", WorkloadType::AIInference), "AI Inference");
657        assert_eq!(format!("{}", WorkloadType::DataProcessing), "Data Processing");
658        assert_eq!(format!("{}", WorkloadType::WebApplication), "Web Application");
659        assert_eq!(format!("{}", WorkloadType::Custom("Custom Task".to_string())), "Custom: Custom Task");
660    }
661
662    #[test]
663    fn test_quantization_level() {
664        // Test that quantization levels exist and can be used
665        let _none = QuantizationLevel::None;
666        let _int8 = QuantizationLevel::Int8;
667        let _int4 = QuantizationLevel::Int4;
668        let _custom = QuantizationLevel::Custom(16);
669        
670        // Basic functionality test
671        assert!(true);
672    }
673
674    #[test]
675    fn test_workload_validation() {
676        let model_params = ModelParameters::new()
677            .parameters(1_000_000_000);
678
679        let workload = AIInferenceWorkload::new(model_params);
680        
681        // Validation should pass for reasonable parameters
682        assert!(workload.validate().is_ok());
683    }
684
685    #[test]
686    fn test_model_parameters_builder() {
687        let params = ModelParameters::new()
688            .parameters(1_000_000_000)
689            .memory_required(8.0)
690            .compute_required(7.5)
691            .prefer_gpu(true)
692            .architecture("transformer")
693            .quantization(QuantizationLevel::Int8)
694            .context_length(2048)
695            .batch_size(4);
696
697        assert_eq!(params.parameters, 1_000_000_000);
698        assert_eq!(params.compute_required, 7.5);
699        assert!(params.prefer_gpu);
700        assert_eq!(params.architecture, Some("transformer".to_string()));
701        assert_eq!(params.context_length, Some(2048));
702        assert_eq!(params.batch_size, 4);
703    }
704}