Struct InferStatistics

Source

pub struct InferStatistics {
    pub success: Option<StatisticDuration>,
    pub fail: Option<StatisticDuration>,
    pub queue: Option<StatisticDuration>,
    pub compute_input: Option<StatisticDuration>,
    pub compute_infer: Option<StatisticDuration>,
    pub compute_output: Option<StatisticDuration>,
    pub cache_hit: Option<StatisticDuration>,
    pub cache_miss: Option<StatisticDuration>,
}

Expand description

@@ @@.. cpp:var:: message InferStatistics @@ @@ Inference statistics. @@

Fields§

§success: Option<StatisticDuration>

@@ .. cpp:var:: StatisticDuration success @@ @@ Cumulative count and duration for successful inference @@ request. The “success” count and cumulative duration includes @@ cache hits. @@

§fail: Option<StatisticDuration>

@@ .. cpp:var:: StatisticDuration fail @@ @@ Cumulative count and duration for failed inference @@ request. @@

§queue: Option<StatisticDuration>

@@ .. cpp:var:: StatisticDuration queue @@ @@ The count and cumulative duration that inference requests wait in @@ scheduling or other queues. The “queue” count and cumulative @@ duration includes cache hits. @@

§compute_input: Option<StatisticDuration>

@@ .. cpp:var:: StatisticDuration compute_input @@ @@ The count and cumulative duration to prepare input tensor data as @@ required by the model framework / backend. For example, this duration @@ should include the time to copy input tensor data to the GPU. @@ The “compute_input” count and cumulative duration do not account for @@ requests that were a cache hit. See the “cache_hit” field for more @@ info. @@

§compute_infer: Option<StatisticDuration>

@@ .. cpp:var:: StatisticDuration compute_infer @@ @@ The count and cumulative duration to execute the model. @@ The “compute_infer” count and cumulative duration do not account for @@ requests that were a cache hit. See the “cache_hit” field for more @@ info. @@

§compute_output: Option<StatisticDuration>

@@ .. cpp:var:: StatisticDuration compute_output @@ @@ The count and cumulative duration to extract output tensor data @@ produced by the model framework / backend. For example, this duration @@ should include the time to copy output tensor data from the GPU. @@ The “compute_output” count and cumulative duration do not account for @@ requests that were a cache hit. See the “cache_hit” field for more @@ info. @@

§cache_hit: Option<StatisticDuration>

@@ .. cpp:var:: StatisticDuration cache_hit @@ @@ The count of response cache hits and cumulative duration to lookup @@ and extract output tensor data from the Response Cache on a cache @@ hit. For example, this duration should include the time to copy @@ output tensor data from the Response Cache to the response object. @@ On cache hits, triton does not need to go to the model/backend @@ for the output tensor data, so the “compute_input”, “compute_infer”, @@ and “compute_output” fields are not updated. Assuming the response @@ cache is enabled for a given model, a cache hit occurs for a @@ request to that model when the request metadata (model name, @@ model version, model inputs) hashes to an existing entry in the @@ cache. On a cache miss, the request hash and response output tensor @@ data is added to the cache. See response cache docs for more info: @@ https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md @@

§cache_miss: Option<StatisticDuration>

@@ .. cpp:var:: StatisticDuration cache_miss @@ @@ The count of response cache misses and cumulative duration to lookup @@ and insert output tensor data from the computed response to the cache. @@ For example, this duration should include the time to copy @@ output tensor data from the response object to the Response Cache. @@ Assuming the response cache is enabled for a given model, a cache @@ miss occurs for a request to that model when the request metadata @@ does NOT hash to an existing entry in the cache. See the response @@ cache docs for more info: @@ https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md @@

Struct InferStatisticsCopy item path

Fields§

Trait Implementations§

impl Clone for InferStatistics

fn clone(&self) -> InferStatistics

fn clone_from(&mut self, source: &Self)

impl Debug for InferStatistics

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for InferStatistics

fn default() -> Self

impl Message for InferStatistics

fn encoded_len(&self) -> usize

fn clear(&mut self)

fn encode<B>(&self, buf: &mut B) -> Result<(), EncodeError>where B: BufMut, Self: Sized,

fn encode_to_vec(&self) -> Vec<u8> ⓘwhere Self: Sized,

fn encode_length_delimited<B>(&self, buf: &mut B) -> Result<(), EncodeError>where B: BufMut, Self: Sized,

fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘwhere Self: Sized,

fn decode<B>(buf: B) -> Result<Self, DecodeError>where B: Buf, Self: Default,

fn decode_length_delimited<B>(buf: B) -> Result<Self, DecodeError>where B: Buf, Self: Default,

fn merge<B>(&mut self, buf: B) -> Result<(), DecodeError>where B: Buf, Self: Sized,

fn merge_length_delimited<B>(&mut self, buf: B) -> Result<(), DecodeError>where B: Buf, Self: Sized,

impl PartialEq for InferStatistics

fn eq(&self, other: &InferStatistics) -> bool

fn ne(&self, other: &Rhs) -> bool

impl StructuralPartialEq for InferStatistics

Auto Trait Implementations§

impl Freeze for InferStatistics

impl RefUnwindSafe for InferStatistics

impl Send for InferStatistics

impl Sync for InferStatistics

impl Unpin for InferStatistics

impl UnwindSafe for InferStatistics

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoRequest<T> for T

fn into_request(self) -> Request<T>

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct InferStatistics

fn encode<B>(&self, buf: &mut B) -> Result<(), EncodeError>
where B: BufMut, Self: Sized,

fn encode_to_vec(&self) -> Vec<u8> ⓘ
where Self: Sized,

fn encode_length_delimited<B>(&self, buf: &mut B) -> Result<(), EncodeError>
where B: BufMut, Self: Sized,

fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘ
where Self: Sized,

fn decode<B>(buf: B) -> Result<Self, DecodeError>
where B: Buf, Self: Default,

fn decode_length_delimited<B>(buf: B) -> Result<Self, DecodeError>
where B: Buf, Self: Default,

fn merge<B>(&mut self, buf: B) -> Result<(), DecodeError>
where B: Buf, Self: Sized,

fn merge_length_delimited<B>(&mut self, buf: B) -> Result<(), DecodeError>
where B: Buf, Self: Sized,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,