use crate::memory_monitor::MemoryConfig;
use super::super::{models::DEFAULT_CONTEXT_POOL_SIZE, DEFAULT_HTTP_PORT, DEFAULT_SOCKET};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EvictionPolicy {
Lru,
Manual,
None,
}
impl Default for EvictionPolicy {
fn default() -> Self {
Self::Lru
}
}
#[derive(Debug, Clone)]
pub struct HttpConfig {
pub port: Option<u16>,
pub addr: String,
pub api_key: Option<String>,
pub enforce_api_key: bool,
pub max_request_body_bytes: usize,
pub max_concurrent_requests: usize,
pub max_requests_per_second: u64,
}
impl Default for HttpConfig {
fn default() -> Self {
Self {
port: Some(DEFAULT_HTTP_PORT),
addr: "127.0.0.1".to_string(),
api_key: None,
enforce_api_key: false,
max_request_body_bytes: 2 * 1024 * 1024,
max_concurrent_requests: 64,
max_requests_per_second: 200,
}
}
}
#[derive(Debug, Clone)]
pub struct ModelDefaultsConfig {
pub context_size: u32,
pub gpu_layers: i32,
pub context_pool_size: usize,
pub threads_per_model: i32,
pub flash_attn: bool,
pub use_mmap: Option<bool>,
pub use_mlock: bool,
pub cache_type_k: Option<String>,
pub cache_type_v: Option<String>,
pub n_batch: Option<u32>,
pub rope_freq_base: Option<f32>,
pub rope_freq_scale: Option<f32>,
pub defrag_thold: Option<f32>,
pub split_mode: Option<String>,
}
impl Default for ModelDefaultsConfig {
fn default() -> Self {
Self {
context_size: 4096,
gpu_layers: 0,
context_pool_size: DEFAULT_CONTEXT_POOL_SIZE,
threads_per_model: (num_cpus::get() / 2).max(1) as i32,
flash_attn: false,
use_mmap: None,
use_mlock: false,
cache_type_k: None,
cache_type_v: None,
n_batch: None,
rope_freq_base: None,
rope_freq_scale: None,
defrag_thold: None,
split_mode: None,
}
}
}
#[derive(Debug, Clone)]
pub struct ResourceConfig {
pub max_tokens_per_request: u32,
pub memory_config: MemoryConfig,
pub enable_memory_monitoring: bool,
pub max_loaded_models: Option<usize>,
pub max_memory_bytes: Option<u64>,
pub eviction_policy: EvictionPolicy,
pub idle_unload_secs: Option<u64>,
}
impl Default for ResourceConfig {
fn default() -> Self {
Self {
max_tokens_per_request: 4096,
memory_config: MemoryConfig::default(),
enable_memory_monitoring: true,
max_loaded_models: None,
max_memory_bytes: None,
eviction_policy: EvictionPolicy::default(),
idle_unload_secs: None,
}
}
}
#[derive(Debug, Clone)]
pub struct DaemonConfig {
pub ipc_addr: String,
pub http: HttpConfig,
pub model_defaults: ModelDefaultsConfig,
pub resources: ResourceConfig,
pub tls_cert_path: Option<String>,
pub tls_key_path: Option<String>,
}
impl Default for DaemonConfig {
fn default() -> Self {
Self {
ipc_addr: DEFAULT_SOCKET.to_string(),
http: HttpConfig::default(),
model_defaults: ModelDefaultsConfig::default(),
resources: ResourceConfig::default(),
tls_cert_path: None,
tls_key_path: None,
}
}
}