Struct ModelOptimizationPolicy

Source

pub struct ModelOptimizationPolicy {
    pub graph: Option<Graph>,
    pub priority: i32,
    pub cuda: Option<Cuda>,
    pub execution_accelerators: Option<ExecutionAccelerators>,
    pub input_pinned_memory: Option<PinnedMemoryBuffer>,
    pub output_pinned_memory: Option<PinnedMemoryBuffer>,
    pub gather_kernel_buffer_threshold: u32,
    pub eager_batching: bool,
}

Expand description

@@ @@.. cpp:var:: message ModelOptimizationPolicy @@ @@ Optimization settings for a model. These settings control if/how a @@ model is optimized and prioritized by the backend framework when @@ it is loaded. @@

Fields§

§graph: Option<Graph>

@@ .. cpp:var:: Graph graph @@ @@ The graph optimization setting for the model. Optional. @@

§priority: i32

@@ .. cpp:var:: ModelPriority priority @@ @@ The priority setting for the model. Optional. @@

§cuda: Option<Cuda>

@@ .. cpp:var:: Cuda cuda @@ @@ CUDA-specific optimization settings. Optional. @@

§execution_accelerators: Option<ExecutionAccelerators>

@@ .. cpp:var:: ExecutionAccelerators execution_accelerators @@ @@ The accelerators used for the model. Optional. @@

§input_pinned_memory: Option<PinnedMemoryBuffer>

@@ .. cpp:var:: PinnedMemoryBuffer input_pinned_memory @@ @@ Use pinned memory buffer when the data transfer for inputs @@ is between GPU memory and non-pinned system memory. @@ Default is true. @@

§output_pinned_memory: Option<PinnedMemoryBuffer>

@@ .. cpp:var:: PinnedMemoryBuffer output_pinned_memory @@ @@ Use pinned memory buffer when the data transfer for outputs @@ is between GPU memory and non-pinned system memory. @@ Default is true. @@

§gather_kernel_buffer_threshold: u32

@@ .. cpp:var:: uint32 gather_kernel_buffer_threshold @@ @@ The backend may use a gather kernel to gather input data if the @@ device has direct access to the source buffer and the destination @@ buffer. In such case, the gather kernel will be used only if the @@ number of buffers to be gathered is greater or equal to @@ the specifed value. If 0, the gather kernel will be disabled. @@ Default value is 0. @@ Currently only recognized by TensorRT backend. @@

§eager_batching: bool

@@ .. cpp:var:: bool eager_batching @@ @@ Start preparing the next batch before the model instance is ready @@ for the next inference. This option can be used to overlap the @@ batch preparation with model execution, with the trade-off that @@ the next batch might be smaller than what it could have been. @@ Default value is false. @@ Currently only recognized by TensorRT backend. @@

Struct ModelOptimizationPolicyCopy item path

Fields§

Implementations§

impl ModelOptimizationPolicy

pub fn priority(&self) -> ModelPriority

pub fn set_priority(&mut self, value: ModelPriority)

Trait Implementations§

impl Clone for ModelOptimizationPolicy

fn clone(&self) -> ModelOptimizationPolicy

fn clone_from(&mut self, source: &Self)

impl Debug for ModelOptimizationPolicy

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for ModelOptimizationPolicy

fn default() -> Self

impl Message for ModelOptimizationPolicy

fn encoded_len(&self) -> usize

fn clear(&mut self)

fn encode<B>(&self, buf: &mut B) -> Result<(), EncodeError>where B: BufMut, Self: Sized,

fn encode_to_vec(&self) -> Vec<u8> ⓘwhere Self: Sized,

fn encode_length_delimited<B>(&self, buf: &mut B) -> Result<(), EncodeError>where B: BufMut, Self: Sized,

fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘwhere Self: Sized,

fn decode<B>(buf: B) -> Result<Self, DecodeError>where B: Buf, Self: Default,

fn decode_length_delimited<B>(buf: B) -> Result<Self, DecodeError>where B: Buf, Self: Default,

fn merge<B>(&mut self, buf: B) -> Result<(), DecodeError>where B: Buf, Self: Sized,

fn merge_length_delimited<B>(&mut self, buf: B) -> Result<(), DecodeError>where B: Buf, Self: Sized,

impl PartialEq for ModelOptimizationPolicy

fn eq(&self, other: &ModelOptimizationPolicy) -> bool

fn ne(&self, other: &Rhs) -> bool

impl StructuralPartialEq for ModelOptimizationPolicy

Auto Trait Implementations§

impl Freeze for ModelOptimizationPolicy

impl RefUnwindSafe for ModelOptimizationPolicy

impl Send for ModelOptimizationPolicy

impl Sync for ModelOptimizationPolicy

impl Unpin for ModelOptimizationPolicy

impl UnwindSafe for ModelOptimizationPolicy

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoRequest<T> for T

fn into_request(self) -> Request<T>

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct ModelOptimizationPolicy

fn encode<B>(&self, buf: &mut B) -> Result<(), EncodeError>
where B: BufMut, Self: Sized,

fn encode_to_vec(&self) -> Vec<u8> ⓘ
where Self: Sized,

fn encode_length_delimited<B>(&self, buf: &mut B) -> Result<(), EncodeError>
where B: BufMut, Self: Sized,

fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘ
where Self: Sized,

fn decode<B>(buf: B) -> Result<Self, DecodeError>
where B: Buf, Self: Default,

fn decode_length_delimited<B>(buf: B) -> Result<Self, DecodeError>
where B: Buf, Self: Default,

fn merge<B>(&mut self, buf: B) -> Result<(), DecodeError>
where B: Buf, Self: Sized,

fn merge_length_delimited<B>(&mut self, buf: B) -> Result<(), DecodeError>
where B: Buf, Self: Sized,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,