{
"models": {
"llama-7b": {
"model_path": "meta-llama/Llama-2-7b-chat-hf",
"port": 8001,
"eviction": { "weights": "offload", "process": "keep_running" },
"extra_args": ["--gpu-memory-utilization", "0.9"]
},
"mistral-7b": {
"model_path": "mistralai/Mistral-7B-Instruct-v0.2",
"port": 8002,
"eviction": { "weights": "discard", "process": "checkpoint" },
"extra_args": ["--gpu-memory-utilization", "0.9"]
}
},
"checkpoint": {
"criu_path": "criu",
"cuda_plugin_dir": "/usr/lib/criu/",
"images_dir": "/tmp/llmux-checkpoints",
"cuda_checkpoint_path": "cuda-checkpoint",
"keep_images": true
},
"policy": {
"policy_type": "fifo",
"request_timeout_secs": 300,
"drain_before_switch": true,
"eviction": { "weights": "offload", "process": "keep_running" }
},
"port": 3000,
"metrics_port": 9090,
"admin_port": 3001
}