1use crate::{DataType, Device, ModelId, ModelInfo, SamplingParams, SamplingPresets};
4use serde::{Deserialize, Serialize};
5use std::{collections::HashMap, time::Duration};
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct EngineConfig {
10 pub model: EngineModelConfig,
11 pub scheduler: SchedulerConfig,
12 pub sampling: SamplingConfig,
13 pub backend: BackendConfig,
14 pub kv_cache: KvCacheConfig,
15 pub memory: MemoryConfig,
16 pub batching: BatchConfig,
17 pub monitoring: MonitoringConfig,
18}
19
20impl Default for EngineConfig {
21 fn default() -> Self {
22 Self {
23 model: EngineModelConfig::default(),
24 scheduler: SchedulerConfig::default(),
25 sampling: SamplingConfig::default(),
26 backend: BackendConfig::default(),
27 kv_cache: KvCacheConfig::default(),
28 memory: MemoryConfig::default(),
29 batching: BatchConfig::default(),
30 monitoring: MonitoringConfig::default(),
31 }
32 }
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct EngineModelConfig {
37 pub model_id: ModelId,
38 pub model_info: Option<ModelInfo>,
39 pub tokenizer: TokenizerConfig,
40}
41
42impl Default for EngineModelConfig {
43 fn default() -> Self {
44 Self {
45 model_id: ModelId::new("default"),
46 model_info: None,
47 tokenizer: TokenizerConfig::default(),
48 }
49 }
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct SchedulerConfig {
55 pub policy: SchedulingPolicy,
57 pub max_waiting_requests: usize,
59 pub max_running_requests: usize,
61 pub enable_preemption: bool,
63 pub enable_load_balancing: bool,
65 pub fair_share_weights: HashMap<String, f32>,
67 pub enable_sla_enforcement: bool,
69}
70
71impl Default for SchedulerConfig {
72 fn default() -> Self {
73 Self {
74 policy: SchedulingPolicy::Priority,
75 max_waiting_requests: 1000,
76 max_running_requests: 256,
77 enable_preemption: true,
78 enable_load_balancing: false,
79 fair_share_weights: HashMap::new(),
80 enable_sla_enforcement: false,
81 }
82 }
83}
84
85#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
87pub enum SchedulingPolicy {
88 FCFS,
90 Priority,
92 FairShare,
94 SJF,
96 RoundRobin,
98 ContinuousBatch,
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct KvCacheConfig {
105 pub cache_type: KvCacheType,
107 pub block_size: usize,
109 pub max_blocks: usize,
111 pub enable_compression: bool,
113 pub compression_ratio: f32,
115 pub enable_multi_level: bool,
117 pub swap_threshold: f32,
119 pub enable_prefix_caching: bool,
121 pub prefix_cache_size: usize,
123}
124
125impl Default for KvCacheConfig {
126 fn default() -> Self {
127 Self {
128 cache_type: KvCacheType::Contiguous,
129 block_size: 16,
130 max_blocks: 1024,
131 enable_compression: false,
132 compression_ratio: 0.5,
133 enable_multi_level: true,
134 swap_threshold: 0.8,
135 enable_prefix_caching: true,
136 prefix_cache_size: 100,
137 }
138 }
139}
140
141#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
143pub enum KvCacheType {
144 Contiguous,
146 Paged,
148 Tree,
150}
151
152#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct MemoryConfig {
155 pub pool_size: Option<usize>,
157 pub enable_pooling: bool,
159 pub alignment: usize,
161 pub enable_defragmentation: bool,
163 pub defragmentation_threshold: f32,
165 pub enable_memory_stats: bool,
167 pub pressure_warning_threshold: f32,
169 pub pressure_critical_threshold: f32,
171}
172
173impl Default for MemoryConfig {
174 fn default() -> Self {
175 Self {
176 pool_size: None,
177 enable_pooling: true,
178 alignment: 256,
179 enable_defragmentation: false,
180 defragmentation_threshold: 0.7,
181 enable_memory_stats: true,
182 pressure_warning_threshold: 0.8,
183 pressure_critical_threshold: 0.95,
184 }
185 }
186}
187
188#[derive(Debug, Clone, Serialize, Deserialize)]
190pub struct BackendConfig {
191 pub backend_type: BackendType,
193 pub device: Device,
195 pub dtype: DataType,
197 pub enable_optimizations: bool,
199 pub optimization_level: u8,
201 pub enable_cuda_graphs: bool,
203 pub enable_kernel_fusion: bool,
205 pub backend_options: HashMap<String, serde_json::Value>,
207}
208
209impl Default for BackendConfig {
210 fn default() -> Self {
211 Self {
212 backend_type: BackendType::Candle,
213 device: Device::CPU,
214 dtype: DataType::FP16,
215 enable_optimizations: true,
216 optimization_level: 2,
217 enable_cuda_graphs: false,
218 enable_kernel_fusion: true,
219 backend_options: HashMap::new(),
220 }
221 }
222}
223
224#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
226pub enum BackendType {
227 Candle,
229 OnnxRuntime,
231 TensorRT,
233 Custom,
235}
236
237#[derive(Debug, Clone, Serialize, Deserialize)]
239pub struct TokenizerConfig {
240 pub tokenizer_type: TokenizerType,
242 pub tokenizer_path: Option<String>,
244 pub enable_fast: bool,
246 pub add_special_tokens: bool,
248 pub truncation: Option<TruncationConfig>,
250 pub padding: Option<PaddingConfig>,
252}
253
254impl Default for TokenizerConfig {
255 fn default() -> Self {
256 Self {
257 tokenizer_type: TokenizerType::BPE,
258 tokenizer_path: None,
259 enable_fast: true,
260 add_special_tokens: true,
261 truncation: None,
262 padding: None,
263 }
264 }
265}
266
267#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
269pub enum TokenizerType {
270 BPE,
272 WordPiece,
274 SentencePiece,
276 Tiktoken,
278 Custom,
280}
281
282#[derive(Debug, Clone, Serialize, Deserialize)]
284pub struct TruncationConfig {
285 pub max_length: usize,
287 pub strategy: TruncationStrategy,
289}
290
291#[derive(Debug, Clone, Serialize, Deserialize)]
293pub enum TruncationStrategy {
294 TruncateStart,
296 TruncateEnd,
298 TruncateBoth,
300}
301
302#[derive(Debug, Clone, Serialize, Deserialize)]
304pub struct PaddingConfig {
305 pub strategy: PaddingStrategy,
307 pub token_id: u32,
309 pub target_length: Option<usize>,
311}
312
313#[derive(Debug, Clone, Serialize, Deserialize)]
315pub enum PaddingStrategy {
316 None,
318 MaxLength,
320 FixedLength,
322}
323
324#[derive(Debug, Clone, Serialize, Deserialize)]
328pub struct SecurityConfig {
329 pub enable_auth: bool,
331 pub api_keys: Vec<String>,
333 pub enable_rate_limiting: bool,
335 pub rate_limit_rpm: u32,
337 pub enable_content_filter: bool,
339 pub max_prompt_length: usize,
341 pub enable_prompt_validation: bool,
343 pub allowed_extensions: Vec<String>,
345}
346
347impl Default for SecurityConfig {
348 fn default() -> Self {
349 Self {
350 enable_auth: false,
351 api_keys: vec![],
352 enable_rate_limiting: true,
353 rate_limit_rpm: 60,
354 enable_content_filter: false,
355 max_prompt_length: 32768,
356 enable_prompt_validation: true,
357 allowed_extensions: vec!["txt".to_string(), "json".to_string()],
358 }
359 }
360}
361
362#[derive(Debug, Clone, Serialize, Deserialize)]
363pub struct SamplingConfig {
364 pub default_params: SamplingParams,
365 pub presets: SamplingPresets,
366 pub enable_custom_processors: bool,
367}
368
369impl Default for SamplingConfig {
370 fn default() -> Self {
371 Self {
372 default_params: SamplingParams::default(),
373 presets: SamplingPresets::default(),
374 enable_custom_processors: false,
375 }
376 }
377}
378
379#[derive(Debug, Clone, Serialize, Deserialize)]
380pub struct MonitoringConfig {
381 pub enable_metrics: bool,
382 pub enable_tracing: bool,
383 pub export_interval: Duration,
384}
385
386impl Default for MonitoringConfig {
387 fn default() -> Self {
388 Self {
389 enable_metrics: true,
390 enable_tracing: true,
391 export_interval: Duration::from_secs(5),
392 }
393 }
394}
395
396#[derive(Debug, Clone, Serialize, Deserialize)]
397pub struct BatchConfig {
398 pub max_batch_size: usize,
399 pub max_wait_ms: u64,
400 pub enable_dynamic: bool,
401 pub enable_continuous: bool,
402}
403
404impl Default for BatchConfig {
405 fn default() -> Self {
406 Self {
407 max_batch_size: 16,
408 max_wait_ms: 8,
409 enable_dynamic: true,
410 enable_continuous: false,
411 }
412 }
413}