pub struct WorkerSpec {Show 21 fields
pub url: String,
pub models: WorkerModels,
pub worker_type: WorkerType,
pub connection_mode: ConnectionMode,
pub runtime_type: RuntimeType,
pub provider: Option<ProviderType>,
pub labels: HashMap<String, String>,
pub priority: u32,
pub cost: f32,
pub api_key: Option<String>,
pub bootstrap_port: Option<u16>,
pub bootstrap_host: String,
pub dp_base_url: Option<String>,
pub dp_rank: Option<usize>,
pub dp_size: Option<usize>,
pub kv_connector: Option<String>,
pub kv_role: Option<String>,
pub kv_block_size: Option<usize>,
pub health: HealthCheckUpdate,
pub max_connection_attempts: u32,
pub load_monitor_interval_secs: Option<u64>,
}Expand description
Core worker identity and configuration.
The single canonical representation of “what is a worker”. Used as the
shared sub-struct across API requests, API responses, and internal runtime
state via #[serde(flatten)].
Fields use #[serde(default)] so the same struct works for both input
(partial config from user) and output (fully resolved state).
Fields§
§url: StringWorker URL.
models: WorkerModelsModels this worker can serve.
worker_type: WorkerTypeWorker type: regular, prefill, or decode.
connection_mode: ConnectionModeConnection mode: http or grpc.
runtime_type: RuntimeTypeRuntime type: sglang, vllm, trtllm, or external.
provider: Option<ProviderType>External provider for API transformations.
None means native/passthrough.
labels: HashMap<String, String>Additional labels/tags.
priority: u32Worker priority (higher = preferred).
cost: f32Worker cost factor (baseline = 1.0).
api_key: Option<String>Worker API key. Accepted on input, never included in responses.
bootstrap_port: Option<u16>Bootstrap port for prefill workers in PD disaggregated mode.
bootstrap_host: StringBootstrap hostname (derived from URL at construction time).
dp_base_url: Option<String>Base URL without DP rank suffix (for DP-aware workers).
When set, url contains the rank-suffixed form ({base}@{rank}).
dp_rank: Option<usize>Data-parallel rank (None = not DP-aware).
dp_size: Option<usize>Total data-parallel group size (None = not DP-aware).
kv_connector: Option<String>KV connector type (e.g. “MooncakeConnector”, “NixlConnector”).
kv_role: Option<String>KV role (e.g. “kv_producer”, “kv_consumer”, “kv_both”).
kv_block_size: Option<usize>KV cache block size (tokens per block) for event-driven routing. When set, overrides the router-level default for this worker’s model. Typically matches the backend engine’s page size (e.g. 16 for SGLang).
health: HealthCheckUpdatePer-worker health check overrides (partial — only Some fields override router defaults).
max_connection_attempts: u32Maximum connection attempts during worker registration (default: 20).
load_monitor_interval_secs: Option<u64>Per-worker load monitor interval override (seconds).
When set, workers in the same group use this interval for load polling.
Falls back to the global load_monitor_interval_secs from router config.
Implementations§
Trait Implementations§
Source§impl Clone for WorkerSpec
impl Clone for WorkerSpec
Source§fn clone(&self) -> WorkerSpec
fn clone(&self) -> WorkerSpec
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for WorkerSpec
impl Debug for WorkerSpec
Source§impl<'de> Deserialize<'de> for WorkerSpec
impl<'de> Deserialize<'de> for WorkerSpec
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Source§impl JsonSchema for WorkerSpec
impl JsonSchema for WorkerSpec
Source§fn schema_name() -> String
fn schema_name() -> String
Source§fn schema_id() -> Cow<'static, str>
fn schema_id() -> Cow<'static, str>
Source§fn json_schema(generator: &mut SchemaGenerator) -> Schema
fn json_schema(generator: &mut SchemaGenerator) -> Schema
Source§fn is_referenceable() -> bool
fn is_referenceable() -> bool
$ref keyword. Read more