pub struct ModelOptimizationPolicy {
pub graph: Option<Graph>,
pub priority: i32,
pub cuda: Option<Cuda>,
pub execution_accelerators: Option<ExecutionAccelerators>,
pub input_pinned_memory: Option<PinnedMemoryBuffer>,
pub output_pinned_memory: Option<PinnedMemoryBuffer>,
pub gather_kernel_buffer_threshold: u32,
pub eager_batching: bool,
}
Expand description
@@ @@.. cpp:var:: message ModelOptimizationPolicy @@ @@ Optimization settings for a model. These settings control if/how a @@ model is optimized and prioritized by the backend framework when @@ it is loaded. @@
Fields§
§graph: Option<Graph>
@@ .. cpp:var:: Graph graph @@ @@ The graph optimization setting for the model. Optional. @@
priority: i32
@@ .. cpp:var:: ModelPriority priority @@ @@ The priority setting for the model. Optional. @@
cuda: Option<Cuda>
@@ .. cpp:var:: Cuda cuda @@ @@ CUDA-specific optimization settings. Optional. @@
execution_accelerators: Option<ExecutionAccelerators>
@@ .. cpp:var:: ExecutionAccelerators execution_accelerators @@ @@ The accelerators used for the model. Optional. @@
input_pinned_memory: Option<PinnedMemoryBuffer>
@@ .. cpp:var:: PinnedMemoryBuffer input_pinned_memory @@ @@ Use pinned memory buffer when the data transfer for inputs @@ is between GPU memory and non-pinned system memory. @@ Default is true. @@
output_pinned_memory: Option<PinnedMemoryBuffer>
@@ .. cpp:var:: PinnedMemoryBuffer output_pinned_memory @@ @@ Use pinned memory buffer when the data transfer for outputs @@ is between GPU memory and non-pinned system memory. @@ Default is true. @@
gather_kernel_buffer_threshold: u32
@@ .. cpp:var:: uint32 gather_kernel_buffer_threshold @@ @@ The backend may use a gather kernel to gather input data if the @@ device has direct access to the source buffer and the destination @@ buffer. In such case, the gather kernel will be used only if the @@ number of buffers to be gathered is greater or equal to @@ the specifed value. If 0, the gather kernel will be disabled. @@ Default value is 0. @@ Currently only recognized by TensorRT backend. @@
eager_batching: bool
@@ .. cpp:var:: bool eager_batching @@ @@ Start preparing the next batch before the model instance is ready @@ for the next inference. This option can be used to overlap the @@ batch preparation with model execution, with the trade-off that @@ the next batch might be smaller than what it could have been. @@ Default value is false. @@ Currently only recognized by TensorRT backend. @@
Implementations§
Source§impl ModelOptimizationPolicy
impl ModelOptimizationPolicy
Sourcepub fn priority(&self) -> ModelPriority
pub fn priority(&self) -> ModelPriority
Returns the enum value of priority
, or the default if the field is set to an invalid enum value.
Sourcepub fn set_priority(&mut self, value: ModelPriority)
pub fn set_priority(&mut self, value: ModelPriority)
Sets priority
to the provided enum value.
Trait Implementations§
Source§impl Clone for ModelOptimizationPolicy
impl Clone for ModelOptimizationPolicy
Source§fn clone(&self) -> ModelOptimizationPolicy
fn clone(&self) -> ModelOptimizationPolicy
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source
. Read moreSource§impl Debug for ModelOptimizationPolicy
impl Debug for ModelOptimizationPolicy
Source§impl Default for ModelOptimizationPolicy
impl Default for ModelOptimizationPolicy
Source§impl Message for ModelOptimizationPolicy
impl Message for ModelOptimizationPolicy
Source§fn encoded_len(&self) -> usize
fn encoded_len(&self) -> usize
Source§fn encode<B>(&self, buf: &mut B) -> Result<(), EncodeError>
fn encode<B>(&self, buf: &mut B) -> Result<(), EncodeError>
Source§fn encode_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
fn encode_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
Source§fn encode_length_delimited<B>(&self, buf: &mut B) -> Result<(), EncodeError>
fn encode_length_delimited<B>(&self, buf: &mut B) -> Result<(), EncodeError>
Source§fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
Source§fn decode<B>(buf: B) -> Result<Self, DecodeError>
fn decode<B>(buf: B) -> Result<Self, DecodeError>
Source§fn decode_length_delimited<B>(buf: B) -> Result<Self, DecodeError>
fn decode_length_delimited<B>(buf: B) -> Result<Self, DecodeError>
Source§fn merge<B>(&mut self, buf: B) -> Result<(), DecodeError>
fn merge<B>(&mut self, buf: B) -> Result<(), DecodeError>
self
. Read moreSource§fn merge_length_delimited<B>(&mut self, buf: B) -> Result<(), DecodeError>
fn merge_length_delimited<B>(&mut self, buf: B) -> Result<(), DecodeError>
self
.Source§impl PartialEq for ModelOptimizationPolicy
impl PartialEq for ModelOptimizationPolicy
impl StructuralPartialEq for ModelOptimizationPolicy
Auto Trait Implementations§
impl Freeze for ModelOptimizationPolicy
impl RefUnwindSafe for ModelOptimizationPolicy
impl Send for ModelOptimizationPolicy
impl Sync for ModelOptimizationPolicy
impl Unpin for ModelOptimizationPolicy
impl UnwindSafe for ModelOptimizationPolicy
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoRequest<T> for T
impl<T> IntoRequest<T> for T
Source§fn into_request(self) -> Request<T>
fn into_request(self) -> Request<T>
T
in a tonic::Request