Struct ModelInstanceGroup

Source

pub struct ModelInstanceGroup {
    pub name: String,
    pub kind: i32,
    pub count: i32,
    pub rate_limiter: Option<ModelRateLimiter>,
    pub gpus: Vec<i32>,
    pub secondary_devices: Vec<SecondaryDevice>,
    pub profile: Vec<String>,
    pub passive: bool,
    pub host_policy: String,
}

Expand description

@@ @@.. cpp:var:: message ModelInstanceGroup @@ @@ A group of one or more instances of a model and resources made @@ available for those instances. @@

Fields§

§name: String

@@ .. cpp:var:: string name @@ @@ Optional name of this group of instances. If not specified the @@ name will be formed as _. The name of @@ individual instances will be further formed by a unique instance @@ number and GPU index: @@

§kind: i32

@@ .. cpp:var:: Kind kind @@ @@ The kind of this instance group. Default is KIND_AUTO. If @@ KIND_AUTO or KIND_GPU then both ‘count’ and ‘gpu’ are valid and @@ may be specified. If KIND_CPU or KIND_MODEL only ‘count’ is valid @@ and ‘gpu’ cannot be specified. @@

§count: i32

@@ .. cpp:var:: int32 count @@ @@ For a group assigned to GPU, the number of instances created for @@ each GPU listed in ‘gpus’. For a group assigned to CPU the number @@ of instances created. Default is 1.

§rate_limiter: Option<ModelRateLimiter>

@@ .. cpp:var:: ModelRateLimiter rate_limiter @@ @@ The rate limiter specific settings to be associated with this @@ instance group. Optional, if not specified no rate limiting @@ will be applied to this instance group. @@

§gpus: Vec<i32>

@@ .. cpp:var:: int32 gpus (repeated) @@ @@ GPU(s) where instances should be available. For each GPU listed, @@ ‘count’ instances of the model will be available. Setting ‘gpus’ @@ to empty (or not specifying at all) is eqivalent to listing all @@ available GPUs. @@

§secondary_devices: Vec<SecondaryDevice>

@@ .. cpp:var:: SecondaryDevice secondary_devices (repeated) @@ @@ Secondary devices that are required by instances specified by this @@ instance group. Optional. @@

§profile: Vec<String>

@@ .. cpp:var:: string profile (repeated) @@ @@ For TensorRT models containing multiple optimization profile, this @@ parameter specifies a set of optimization profiles available to this @@ instance group. The inference server will choose the optimal profile @@ based on the shapes of the input tensors. This field should lie @@ between 0 and - 1 @@ and be specified only for TensorRT backend, otherwise an error will @@ be generated. If not specified, the server will select the first @@ optimization profile by default. @@

§passive: bool

@@ .. cpp:var:: bool passive @@ @@ Whether the instances within this instance group will be accepting @@ inference requests from the scheduler. If true, the instances will @@ not be added to the scheduler. Default value is false. @@

§host_policy: String

@@ .. cpp:var:: string host_policy @@ @@ The host policy name that the instance to be associated with. @@ The default value is set to reflect the device kind of the instance, @@ for instance, KIND_CPU is “cpu”, KIND_MODEL is “model” and @@ KIND_GPU is “gpu_<gpu_id>”. @@

Struct ModelInstanceGroupCopy item path

Fields§

Implementations§

impl ModelInstanceGroup

pub fn kind(&self) -> Kind

pub fn set_kind(&mut self, value: Kind)

Trait Implementations§

impl Clone for ModelInstanceGroup

fn clone(&self) -> ModelInstanceGroup

fn clone_from(&mut self, source: &Self)

impl Debug for ModelInstanceGroup

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for ModelInstanceGroup

fn default() -> Self

impl Message for ModelInstanceGroup

fn encoded_len(&self) -> usize

fn clear(&mut self)

fn encode<B>(&self, buf: &mut B) -> Result<(), EncodeError>where B: BufMut, Self: Sized,

fn encode_to_vec(&self) -> Vec<u8> ⓘwhere Self: Sized,

fn encode_length_delimited<B>(&self, buf: &mut B) -> Result<(), EncodeError>where B: BufMut, Self: Sized,

fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘwhere Self: Sized,

fn decode<B>(buf: B) -> Result<Self, DecodeError>where B: Buf, Self: Default,

fn decode_length_delimited<B>(buf: B) -> Result<Self, DecodeError>where B: Buf, Self: Default,

fn merge<B>(&mut self, buf: B) -> Result<(), DecodeError>where B: Buf, Self: Sized,

fn merge_length_delimited<B>(&mut self, buf: B) -> Result<(), DecodeError>where B: Buf, Self: Sized,

impl PartialEq for ModelInstanceGroup

fn eq(&self, other: &ModelInstanceGroup) -> bool

fn ne(&self, other: &Rhs) -> bool

impl StructuralPartialEq for ModelInstanceGroup

Auto Trait Implementations§

impl Freeze for ModelInstanceGroup

impl RefUnwindSafe for ModelInstanceGroup

impl Send for ModelInstanceGroup

impl Sync for ModelInstanceGroup

impl Unpin for ModelInstanceGroup

impl UnwindSafe for ModelInstanceGroup

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoRequest<T> for T

fn into_request(self) -> Request<T>

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct ModelInstanceGroup

fn encode<B>(&self, buf: &mut B) -> Result<(), EncodeError>
where B: BufMut, Self: Sized,

fn encode_to_vec(&self) -> Vec<u8> ⓘ
where Self: Sized,

fn encode_length_delimited<B>(&self, buf: &mut B) -> Result<(), EncodeError>
where B: BufMut, Self: Sized,

fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘ
where Self: Sized,

fn decode<B>(buf: B) -> Result<Self, DecodeError>
where B: Buf, Self: Default,

fn decode_length_delimited<B>(buf: B) -> Result<Self, DecodeError>
where B: Buf, Self: Default,

fn merge<B>(&mut self, buf: B) -> Result<(), DecodeError>
where B: Buf, Self: Sized,

fn merge_length_delimited<B>(&mut self, buf: B) -> Result<(), DecodeError>
where B: Buf, Self: Sized,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,