pub struct ExecutionAccelerators {
pub gpu_execution_accelerator: Vec<Accelerator>,
pub cpu_execution_accelerator: Vec<Accelerator>,
}
Expand description
@@ @@ .. cpp:var:: message ExecutionAccelerators @@ @@ Specify the preferred execution accelerators to be used to execute @@ the model. Currently only recognized by ONNX Runtime backend and @@ TensorFlow backend. @@ @@ For ONNX Runtime backend, it will deploy the model with the execution @@ accelerators by priority, the priority is determined based on the @@ order that they are set, i.e. the provider at the front has highest @@ priority. Overall, the priority will be in the following order: @@ <gpu_execution_accelerator> (if instance is on GPU) @@ CUDA Execution Provider (if instance is on GPU) @@ <cpu_execution_accelerator> @@ Default CPU Execution Provider @@
Fields§
§gpu_execution_accelerator: Vec<Accelerator>
@@ .. cpp:var:: Accelerator gpu_execution_accelerator (repeated) @@ @@ The preferred execution provider to be used if the model instance @@ is deployed on GPU. @@ @@ For ONNX Runtime backend, possible value is “tensorrt” as name, @@ and no parameters are required. @@ @@ For TensorFlow backend, possible values are “tensorrt”, @@ “auto_mixed_precision”, “gpu_io”. @@ @@ For “tensorrt”, the following parameters can be specified: @@ “precision_mode”: The precision used for optimization. @@ Allowed values are “FP32” and “FP16”. Default value is “FP32”. @@ @@ “max_cached_engines”: The maximum number of cached TensorRT @@ engines in dynamic TensorRT ops. Default value is 100. @@ @@ “minimum_segment_size”: The smallest model subgraph that will @@ be considered for optimization by TensorRT. Default value is 3. @@ @@ “max_workspace_size_bytes”: The maximum GPU memory the model @@ can use temporarily during execution. Default value is 1GB. @@ @@ For “auto_mixed_precision”, no parameters are required. If set, @@ the model will try to use FP16 for better performance. @@ This optimization can not be set with “tensorrt”. @@ @@ For “gpu_io”, no parameters are required. If set, the model will @@ be executed using TensorFlow Callable API to set input and output @@ tensors in GPU memory if possible, which can reduce data transfer @@ overhead if the model is used in ensemble. However, the Callable @@ object will be created on model creation and it will request all @@ outputs for every model execution, which may impact the @@ performance if a request does not require all outputs. This @@ optimization will only take affect if the model instance is @@ created with KIND_GPU. @@
cpu_execution_accelerator: Vec<Accelerator>
@@ .. cpp:var:: Accelerator cpu_execution_accelerator (repeated) @@ @@ The preferred execution provider to be used if the model instance @@ is deployed on CPU. @@ @@ For ONNX Runtime backend, possible value is “openvino” as name, @@ and no parameters are required. @@
Trait Implementations§
Source§impl Clone for ExecutionAccelerators
impl Clone for ExecutionAccelerators
Source§fn clone(&self) -> ExecutionAccelerators
fn clone(&self) -> ExecutionAccelerators
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source
. Read moreSource§impl Debug for ExecutionAccelerators
impl Debug for ExecutionAccelerators
Source§impl Default for ExecutionAccelerators
impl Default for ExecutionAccelerators
Source§impl Message for ExecutionAccelerators
impl Message for ExecutionAccelerators
Source§fn encoded_len(&self) -> usize
fn encoded_len(&self) -> usize
Source§fn encode<B>(&self, buf: &mut B) -> Result<(), EncodeError>
fn encode<B>(&self, buf: &mut B) -> Result<(), EncodeError>
Source§fn encode_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
fn encode_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
Source§fn encode_length_delimited<B>(&self, buf: &mut B) -> Result<(), EncodeError>
fn encode_length_delimited<B>(&self, buf: &mut B) -> Result<(), EncodeError>
Source§fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
Source§fn decode<B>(buf: B) -> Result<Self, DecodeError>
fn decode<B>(buf: B) -> Result<Self, DecodeError>
Source§fn decode_length_delimited<B>(buf: B) -> Result<Self, DecodeError>
fn decode_length_delimited<B>(buf: B) -> Result<Self, DecodeError>
Source§fn merge<B>(&mut self, buf: B) -> Result<(), DecodeError>
fn merge<B>(&mut self, buf: B) -> Result<(), DecodeError>
self
. Read moreSource§fn merge_length_delimited<B>(&mut self, buf: B) -> Result<(), DecodeError>
fn merge_length_delimited<B>(&mut self, buf: B) -> Result<(), DecodeError>
self
.Source§impl PartialEq for ExecutionAccelerators
impl PartialEq for ExecutionAccelerators
impl StructuralPartialEq for ExecutionAccelerators
Auto Trait Implementations§
impl Freeze for ExecutionAccelerators
impl RefUnwindSafe for ExecutionAccelerators
impl Send for ExecutionAccelerators
impl Sync for ExecutionAccelerators
impl Unpin for ExecutionAccelerators
impl UnwindSafe for ExecutionAccelerators
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoRequest<T> for T
impl<T> IntoRequest<T> for T
Source§fn into_request(self) -> Request<T>
fn into_request(self) -> Request<T>
T
in a tonic::Request