pub struct ModelDynamicBatching {
pub preferred_batch_size: Vec<i32>,
pub max_queue_delay_microseconds: u64,
pub preserve_ordering: bool,
pub priority_levels: u32,
pub default_priority_level: u32,
pub default_queue_policy: Option<ModelQueuePolicy>,
pub priority_queue_policy: HashMap<u32, ModelQueuePolicy>,
}Expand description
@@ @@.. cpp:var:: message ModelDynamicBatching @@ @@ Dynamic batching configuration. These settings control how dynamic @@ batching operates for the model. @@
Fields§
§preferred_batch_size: Vec<i32>@@ .. cpp:var:: int32 preferred_batch_size (repeated) @@ @@ Preferred batch sizes for dynamic batching. If a batch of one of @@ these sizes can be formed it will be executed immediately. If @@ not specified a preferred batch size will be chosen automatically @@ based on model and GPU characteristics. @@
max_queue_delay_microseconds: u64@@ .. cpp:var:: uint64 max_queue_delay_microseconds @@ @@ The maximum time, in microseconds, a request will be delayed in @@ the scheduling queue to wait for additional requests for @@ batching. Default is 0. @@
preserve_ordering: bool@@ .. cpp:var:: bool preserve_ordering @@ @@ Should the dynamic batcher preserve the ordering of responses to @@ match the order of requests received by the scheduler. Default is @@ false. If true, the responses will be returned in the same order as @@ the order of requests sent to the scheduler. If false, the responses @@ may be returned in arbitrary order. This option is specifically @@ needed when a sequence of related inference requests (i.e. inference @@ requests with the same correlation ID) are sent to the dynamic @@ batcher to ensure that the sequence responses are in the correct @@ order. @@
priority_levels: u32@@ .. cpp:var:: uint32 priority_levels @@ @@ The number of priority levels to be enabled for the model, @@ the priority level starts from 1 and 1 is the highest priority. @@ Requests are handled in priority order with all priority 1 requests @@ processed before priority 2, all priority 2 requests processed before @@ priority 3, etc. Requests with the same priority level will be @@ handled in the order that they are received. @@
default_priority_level: u32@@ .. cpp:var:: uint32 default_priority_level @@ @@ The priority level used for requests that don’t specify their @@ priority. The value must be in the range [ 1, ‘priority_levels’ ]. @@
default_queue_policy: Option<ModelQueuePolicy>@@ .. cpp:var:: ModelQueuePolicy default_queue_policy @@ @@ The default queue policy used for requests that don’t require @@ priority handling and requests that specify priority levels where @@ there is no specific policy given. If not specified, a policy with @@ default field values will be used. @@
priority_queue_policy: HashMap<u32, ModelQueuePolicy>@@ .. cpp:var:: map<uint32, ModelQueuePolicy> priority_queue_policy @@ @@ Specify the queue policy for the priority level. The default queue @@ policy will be used if a priority level doesn’t specify a queue @@ policy. @@
Trait Implementations§
Source§impl Clone for ModelDynamicBatching
impl Clone for ModelDynamicBatching
Source§fn clone(&self) -> ModelDynamicBatching
fn clone(&self) -> ModelDynamicBatching
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for ModelDynamicBatching
impl Debug for ModelDynamicBatching
Source§impl Default for ModelDynamicBatching
impl Default for ModelDynamicBatching
Source§impl Message for ModelDynamicBatching
impl Message for ModelDynamicBatching
Source§fn encoded_len(&self) -> usize
fn encoded_len(&self) -> usize
Source§fn encode<B>(&self, buf: &mut B) -> Result<(), EncodeError>
fn encode<B>(&self, buf: &mut B) -> Result<(), EncodeError>
Source§fn encode_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
fn encode_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
Source§fn encode_length_delimited<B>(&self, buf: &mut B) -> Result<(), EncodeError>
fn encode_length_delimited<B>(&self, buf: &mut B) -> Result<(), EncodeError>
Source§fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
Source§fn decode<B>(buf: B) -> Result<Self, DecodeError>
fn decode<B>(buf: B) -> Result<Self, DecodeError>
Source§fn decode_length_delimited<B>(buf: B) -> Result<Self, DecodeError>
fn decode_length_delimited<B>(buf: B) -> Result<Self, DecodeError>
Source§fn merge<B>(&mut self, buf: B) -> Result<(), DecodeError>
fn merge<B>(&mut self, buf: B) -> Result<(), DecodeError>
self. Read moreSource§fn merge_length_delimited<B>(&mut self, buf: B) -> Result<(), DecodeError>
fn merge_length_delimited<B>(&mut self, buf: B) -> Result<(), DecodeError>
self.Source§impl PartialEq for ModelDynamicBatching
impl PartialEq for ModelDynamicBatching
impl StructuralPartialEq for ModelDynamicBatching
Auto Trait Implementations§
impl Freeze for ModelDynamicBatching
impl RefUnwindSafe for ModelDynamicBatching
impl Send for ModelDynamicBatching
impl Sync for ModelDynamicBatching
impl Unpin for ModelDynamicBatching
impl UnwindSafe for ModelDynamicBatching
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoRequest<T> for T
impl<T> IntoRequest<T> for T
Source§fn into_request(self) -> Request<T>
fn into_request(self) -> Request<T>
T in a tonic::Request