pub struct InferStatistics {
pub success: Option<StatisticDuration>,
pub fail: Option<StatisticDuration>,
pub queue: Option<StatisticDuration>,
pub compute_input: Option<StatisticDuration>,
pub compute_infer: Option<StatisticDuration>,
pub compute_output: Option<StatisticDuration>,
pub cache_hit: Option<StatisticDuration>,
pub cache_miss: Option<StatisticDuration>,
}
Expand description
@@ @@.. cpp:var:: message InferStatistics @@ @@ Inference statistics. @@
Fields§
§success: Option<StatisticDuration>
@@ .. cpp:var:: StatisticDuration success @@ @@ Cumulative count and duration for successful inference @@ request. The “success” count and cumulative duration includes @@ cache hits. @@
fail: Option<StatisticDuration>
@@ .. cpp:var:: StatisticDuration fail @@ @@ Cumulative count and duration for failed inference @@ request. @@
queue: Option<StatisticDuration>
@@ .. cpp:var:: StatisticDuration queue @@ @@ The count and cumulative duration that inference requests wait in @@ scheduling or other queues. The “queue” count and cumulative @@ duration includes cache hits. @@
compute_input: Option<StatisticDuration>
@@ .. cpp:var:: StatisticDuration compute_input @@ @@ The count and cumulative duration to prepare input tensor data as @@ required by the model framework / backend. For example, this duration @@ should include the time to copy input tensor data to the GPU. @@ The “compute_input” count and cumulative duration do not account for @@ requests that were a cache hit. See the “cache_hit” field for more @@ info. @@
compute_infer: Option<StatisticDuration>
@@ .. cpp:var:: StatisticDuration compute_infer @@ @@ The count and cumulative duration to execute the model. @@ The “compute_infer” count and cumulative duration do not account for @@ requests that were a cache hit. See the “cache_hit” field for more @@ info. @@
compute_output: Option<StatisticDuration>
@@ .. cpp:var:: StatisticDuration compute_output @@ @@ The count and cumulative duration to extract output tensor data @@ produced by the model framework / backend. For example, this duration @@ should include the time to copy output tensor data from the GPU. @@ The “compute_output” count and cumulative duration do not account for @@ requests that were a cache hit. See the “cache_hit” field for more @@ info. @@
cache_hit: Option<StatisticDuration>
@@ .. cpp:var:: StatisticDuration cache_hit @@ @@ The count of response cache hits and cumulative duration to lookup @@ and extract output tensor data from the Response Cache on a cache @@ hit. For example, this duration should include the time to copy @@ output tensor data from the Response Cache to the response object. @@ On cache hits, triton does not need to go to the model/backend @@ for the output tensor data, so the “compute_input”, “compute_infer”, @@ and “compute_output” fields are not updated. Assuming the response @@ cache is enabled for a given model, a cache hit occurs for a @@ request to that model when the request metadata (model name, @@ model version, model inputs) hashes to an existing entry in the @@ cache. On a cache miss, the request hash and response output tensor @@ data is added to the cache. See response cache docs for more info: @@ https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md @@
cache_miss: Option<StatisticDuration>
@@ .. cpp:var:: StatisticDuration cache_miss @@ @@ The count of response cache misses and cumulative duration to lookup @@ and insert output tensor data from the computed response to the cache. @@ For example, this duration should include the time to copy @@ output tensor data from the response object to the Response Cache. @@ Assuming the response cache is enabled for a given model, a cache @@ miss occurs for a request to that model when the request metadata @@ does NOT hash to an existing entry in the cache. See the response @@ cache docs for more info: @@ https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md @@
Trait Implementations§
Source§impl Clone for InferStatistics
impl Clone for InferStatistics
Source§fn clone(&self) -> InferStatistics
fn clone(&self) -> InferStatistics
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source
. Read moreSource§impl Debug for InferStatistics
impl Debug for InferStatistics
Source§impl Default for InferStatistics
impl Default for InferStatistics
Source§impl Message for InferStatistics
impl Message for InferStatistics
Source§fn encoded_len(&self) -> usize
fn encoded_len(&self) -> usize
Source§fn encode<B>(&self, buf: &mut B) -> Result<(), EncodeError>
fn encode<B>(&self, buf: &mut B) -> Result<(), EncodeError>
Source§fn encode_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
fn encode_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
Source§fn encode_length_delimited<B>(&self, buf: &mut B) -> Result<(), EncodeError>
fn encode_length_delimited<B>(&self, buf: &mut B) -> Result<(), EncodeError>
Source§fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
Source§fn decode<B>(buf: B) -> Result<Self, DecodeError>
fn decode<B>(buf: B) -> Result<Self, DecodeError>
Source§fn decode_length_delimited<B>(buf: B) -> Result<Self, DecodeError>
fn decode_length_delimited<B>(buf: B) -> Result<Self, DecodeError>
Source§fn merge<B>(&mut self, buf: B) -> Result<(), DecodeError>
fn merge<B>(&mut self, buf: B) -> Result<(), DecodeError>
self
. Read moreSource§fn merge_length_delimited<B>(&mut self, buf: B) -> Result<(), DecodeError>
fn merge_length_delimited<B>(&mut self, buf: B) -> Result<(), DecodeError>
self
.Source§impl PartialEq for InferStatistics
impl PartialEq for InferStatistics
impl StructuralPartialEq for InferStatistics
Auto Trait Implementations§
impl Freeze for InferStatistics
impl RefUnwindSafe for InferStatistics
impl Send for InferStatistics
impl Sync for InferStatistics
impl Unpin for InferStatistics
impl UnwindSafe for InferStatistics
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoRequest<T> for T
impl<T> IntoRequest<T> for T
Source§fn into_request(self) -> Request<T>
fn into_request(self) -> Request<T>
T
in a tonic::Request