pub struct ModelInstanceGroup {
pub name: String,
pub kind: i32,
pub count: i32,
pub rate_limiter: Option<ModelRateLimiter>,
pub gpus: Vec<i32>,
pub secondary_devices: Vec<SecondaryDevice>,
pub profile: Vec<String>,
pub passive: bool,
pub host_policy: String,
}
Expand description
@@ @@.. cpp:var:: message ModelInstanceGroup @@ @@ A group of one or more instances of a model and resources made @@ available for those instances. @@
Fields§
§name: String
@@ .. cpp:var:: string name
@@
@@ Optional name of this group of instances. If not specified the
@@ name will be formed as
kind: i32
@@ .. cpp:var:: Kind kind @@ @@ The kind of this instance group. Default is KIND_AUTO. If @@ KIND_AUTO or KIND_GPU then both ‘count’ and ‘gpu’ are valid and @@ may be specified. If KIND_CPU or KIND_MODEL only ‘count’ is valid @@ and ‘gpu’ cannot be specified. @@
count: i32
@@ .. cpp:var:: int32 count @@ @@ For a group assigned to GPU, the number of instances created for @@ each GPU listed in ‘gpus’. For a group assigned to CPU the number @@ of instances created. Default is 1.
rate_limiter: Option<ModelRateLimiter>
@@ .. cpp:var:: ModelRateLimiter rate_limiter @@ @@ The rate limiter specific settings to be associated with this @@ instance group. Optional, if not specified no rate limiting @@ will be applied to this instance group. @@
gpus: Vec<i32>
@@ .. cpp:var:: int32 gpus (repeated) @@ @@ GPU(s) where instances should be available. For each GPU listed, @@ ‘count’ instances of the model will be available. Setting ‘gpus’ @@ to empty (or not specifying at all) is eqivalent to listing all @@ available GPUs. @@
secondary_devices: Vec<SecondaryDevice>
@@ .. cpp:var:: SecondaryDevice secondary_devices (repeated) @@ @@ Secondary devices that are required by instances specified by this @@ instance group. Optional. @@
profile: Vec<String>
@@ .. cpp:var:: string profile (repeated)
@@
@@ For TensorRT models containing multiple optimization profile, this
@@ parameter specifies a set of optimization profiles available to this
@@ instance group. The inference server will choose the optimal profile
@@ based on the shapes of the input tensors. This field should lie
@@ between 0 and
passive: bool
@@ .. cpp:var:: bool passive @@ @@ Whether the instances within this instance group will be accepting @@ inference requests from the scheduler. If true, the instances will @@ not be added to the scheduler. Default value is false. @@
host_policy: String
@@ .. cpp:var:: string host_policy @@ @@ The host policy name that the instance to be associated with. @@ The default value is set to reflect the device kind of the instance, @@ for instance, KIND_CPU is “cpu”, KIND_MODEL is “model” and @@ KIND_GPU is “gpu_<gpu_id>”. @@
Implementations§
Trait Implementations§
Source§impl Clone for ModelInstanceGroup
impl Clone for ModelInstanceGroup
Source§fn clone(&self) -> ModelInstanceGroup
fn clone(&self) -> ModelInstanceGroup
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source
. Read moreSource§impl Debug for ModelInstanceGroup
impl Debug for ModelInstanceGroup
Source§impl Default for ModelInstanceGroup
impl Default for ModelInstanceGroup
Source§impl Message for ModelInstanceGroup
impl Message for ModelInstanceGroup
Source§fn encoded_len(&self) -> usize
fn encoded_len(&self) -> usize
Source§fn encode<B>(&self, buf: &mut B) -> Result<(), EncodeError>
fn encode<B>(&self, buf: &mut B) -> Result<(), EncodeError>
Source§fn encode_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
fn encode_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
Source§fn encode_length_delimited<B>(&self, buf: &mut B) -> Result<(), EncodeError>
fn encode_length_delimited<B>(&self, buf: &mut B) -> Result<(), EncodeError>
Source§fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
Source§fn decode<B>(buf: B) -> Result<Self, DecodeError>
fn decode<B>(buf: B) -> Result<Self, DecodeError>
Source§fn decode_length_delimited<B>(buf: B) -> Result<Self, DecodeError>
fn decode_length_delimited<B>(buf: B) -> Result<Self, DecodeError>
Source§fn merge<B>(&mut self, buf: B) -> Result<(), DecodeError>
fn merge<B>(&mut self, buf: B) -> Result<(), DecodeError>
self
. Read moreSource§fn merge_length_delimited<B>(&mut self, buf: B) -> Result<(), DecodeError>
fn merge_length_delimited<B>(&mut self, buf: B) -> Result<(), DecodeError>
self
.Source§impl PartialEq for ModelInstanceGroup
impl PartialEq for ModelInstanceGroup
impl StructuralPartialEq for ModelInstanceGroup
Auto Trait Implementations§
impl Freeze for ModelInstanceGroup
impl RefUnwindSafe for ModelInstanceGroup
impl Send for ModelInstanceGroup
impl Sync for ModelInstanceGroup
impl Unpin for ModelInstanceGroup
impl UnwindSafe for ModelInstanceGroup
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoRequest<T> for T
impl<T> IntoRequest<T> for T
Source§fn into_request(self) -> Request<T>
fn into_request(self) -> Request<T>
T
in a tonic::Request