pub struct WorkerConfigRequest {Show 20 fields
pub url: String,
pub api_key: Option<String>,
pub model_id: Option<String>,
pub priority: Option<u32>,
pub cost: Option<f32>,
pub worker_type: Option<String>,
pub bootstrap_port: Option<u16>,
pub runtime: Option<String>,
pub tokenizer_path: Option<String>,
pub reasoning_parser: Option<String>,
pub tool_parser: Option<String>,
pub chat_template: Option<String>,
pub labels: HashMap<String, String>,
pub health_check_timeout_secs: u64,
pub health_check_interval_secs: u64,
pub health_success_threshold: u32,
pub health_failure_threshold: u32,
pub disable_health_check: bool,
pub max_connection_attempts: u32,
pub dp_aware: bool,
}Expand description
Worker configuration for API requests
Fields§
§url: StringWorker URL (required)
api_key: Option<String>Worker API key (optional)
model_id: Option<String>Model ID (optional, will query from server if not provided)
priority: Option<u32>Worker priority (optional, default: 50, higher = preferred)
cost: Option<f32>Worker cost factor (optional, default: 1.0)
worker_type: Option<String>Worker type (optional: “regular”, “prefill”, “decode”)
bootstrap_port: Option<u16>Bootstrap port for prefill workers (optional)
runtime: Option<String>Runtime type (optional: “sglang”, “vllm”, default: “sglang”) Only relevant for gRPC workers
tokenizer_path: Option<String>Tokenizer path for gRPC mode
reasoning_parser: Option<String>Reasoning parser type for gRPC mode
tool_parser: Option<String>Tool parser type for gRPC mode
chat_template: Option<String>Chat template for gRPC mode
labels: HashMap<String, String>Additional labels (optional)
health_check_timeout_secs: u64Health check timeout in seconds (default: 30)
health_check_interval_secs: u64Health check interval in seconds (default: 60)
health_success_threshold: u32Number of successful health checks needed to mark worker as healthy (default: 2)
health_failure_threshold: u32Number of failed health checks before marking worker as unhealthy (default: 3)
disable_health_check: boolDisable periodic health checks for this worker (default: false)
max_connection_attempts: u32Maximum connection attempts during worker registration (default: 20)
dp_aware: boolEnable data parallelism aware scheduling (default: false)
Trait Implementations§
Source§impl Clone for WorkerConfigRequest
impl Clone for WorkerConfigRequest
Source§fn clone(&self) -> WorkerConfigRequest
fn clone(&self) -> WorkerConfigRequest
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more