Struct WorkerSpec

Source

pub struct WorkerSpec {Show 21 fields
    pub url: String,
    pub models: WorkerModels,
    pub worker_type: WorkerType,
    pub connection_mode: ConnectionMode,
    pub runtime_type: RuntimeType,
    pub provider: Option<ProviderType>,
    pub labels: HashMap<String, String>,
    pub priority: u32,
    pub cost: f32,
    pub api_key: Option<String>,
    pub bootstrap_port: Option<u16>,
    pub bootstrap_host: String,
    pub dp_base_url: Option<String>,
    pub dp_rank: Option<usize>,
    pub dp_size: Option<usize>,
    pub kv_connector: Option<String>,
    pub kv_role: Option<String>,
    pub kv_block_size: Option<usize>,
    pub health: HealthCheckUpdate,
    pub max_connection_attempts: u32,
    pub load_monitor_interval_secs: Option<u64>,
}

Expand description

Core worker identity and configuration.

The single canonical representation of “what is a worker”. Used as the shared sub-struct across API requests, API responses, and internal runtime state via #[serde(flatten)].

Fields use #[serde(default)] so the same struct works for both input (partial config from user) and output (fully resolved state).

Fields§

§url: String

Worker URL.

§models: WorkerModels

Models this worker can serve.

§worker_type: WorkerType

Worker type: regular, prefill, or decode.

§connection_mode: ConnectionMode

Connection mode: http or grpc.

§runtime_type: RuntimeType

Runtime type: sglang, vllm, trtllm, or external.

§provider: Option<ProviderType>

External provider for API transformations. None means native/passthrough.

§labels: HashMap<String, String>

Additional labels/tags.

§priority: u32

Worker priority (higher = preferred).

§cost: f32

Worker cost factor (baseline = 1.0).

§api_key: Option<String>

Worker API key. Accepted on input, never included in responses.

§bootstrap_port: Option<u16>

Bootstrap port for prefill workers in PD disaggregated mode.

§bootstrap_host: String

Bootstrap hostname (derived from URL at construction time).

§dp_base_url: Option<String>

Base URL without DP rank suffix (for DP-aware workers). When set, url contains the rank-suffixed form ({base}@{rank}).

§dp_rank: Option<usize>

Data-parallel rank (None = not DP-aware).

§dp_size: Option<usize>

Total data-parallel group size (None = not DP-aware).

§kv_connector: Option<String>

KV connector type (e.g. “MooncakeConnector”, “NixlConnector”).

§kv_role: Option<String>

KV role (e.g. “kv_producer”, “kv_consumer”, “kv_both”).

§kv_block_size: Option<usize>

KV cache block size (tokens per block) for event-driven routing. When set, overrides the router-level default for this worker’s model. Typically matches the backend engine’s page size (e.g. 16 for SGLang).

§health: HealthCheckUpdate

Per-worker health check overrides (partial — only Some fields override router defaults).

§max_connection_attempts: u32

Maximum connection attempts during worker registration (default: 20).

§load_monitor_interval_secs: Option<u64>

Per-worker load monitor interval override (seconds). When set, workers in the same group use this interval for load polling. Falls back to the global load_monitor_interval_secs from router config.

Struct WorkerSpec Copy item path

Fields§

Implementations§

impl WorkerSpec

pub fn new(url: impl Into<String>) -> Self

Trait Implementations§

impl Clone for WorkerSpec

fn clone(&self) -> WorkerSpec

fn clone_from(&mut self, source: &Self)

impl Debug for WorkerSpec

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl<'de> Deserialize<'de> for WorkerSpec

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where __D: Deserializer<'de>,

impl JsonSchema for WorkerSpec

fn schema_name() -> String

fn schema_id() -> Cow<'static, str>

fn json_schema(generator: &mut SchemaGenerator) -> Schema

fn is_referenceable() -> bool

impl Serialize for WorkerSpec

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>where __S: Serializer,

Auto Trait Implementations§

impl Freeze for WorkerSpec

impl RefUnwindSafe for WorkerSpec

impl Send for WorkerSpec

impl Sync for WorkerSpec

impl Unpin for WorkerSpec

impl UnsafeUnpin for WorkerSpec

impl UnwindSafe for WorkerSpec

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> DynClone for Twhere T: Clone,

fn __clone_box(&self, _: Private) -> *mut ()

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,

Struct WorkerSpec

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T> DynClone for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,