pub struct EngineMetrics {
pub tokens_generated: AtomicU64,
pub tokens_prefilled: AtomicU64,
pub kv_cache_hits: AtomicU64,
pub kv_cache_misses: AtomicU64,
pub decode_nanos: AtomicU64,
pub prefill_nanos: AtomicU64,
pub requests_started: AtomicU64,
pub requests_completed: AtomicU64,
}Expand description
Thread-safe metrics counters for an inference engine.
Fields§
§tokens_generated: AtomicU64Total tokens generated (decode phase).
tokens_prefilled: AtomicU64Total tokens processed in prefill.
kv_cache_hits: AtomicU64Total KV cache hits (prefix/page cache returns a cached slot).
kv_cache_misses: AtomicU64Total KV cache misses.
decode_nanos: AtomicU64Total decode time in nanoseconds.
prefill_nanos: AtomicU64Total prefill time in nanoseconds.
requests_started: AtomicU64Number of requests started.
requests_completed: AtomicU64Number of requests completed.
Implementations§
Source§impl EngineMetrics
impl EngineMetrics
Sourcepub fn record_decode_token(&self, elapsed: Duration)
pub fn record_decode_token(&self, elapsed: Duration)
Record a single decode token and the time taken to produce it.
Sourcepub fn record_prefill(&self, n_tokens: u64, elapsed: Duration)
pub fn record_prefill(&self, n_tokens: u64, elapsed: Duration)
Record a prefill phase that processed n_tokens prompt tokens.
Sourcepub fn record_kv_hit(&self)
pub fn record_kv_hit(&self)
Record a KV-cache hit (prefix or page reuse).
Sourcepub fn record_kv_miss(&self)
pub fn record_kv_miss(&self)
Record a KV-cache miss (full prefill required).
Sourcepub fn record_request_start(&self)
pub fn record_request_start(&self)
Record that a new inference request has started.
Sourcepub fn record_request_complete(&self)
pub fn record_request_complete(&self)
Record that an inference request has completed.
Sourcepub fn throughput(&self) -> (f64, f64)
pub fn throughput(&self) -> (f64, f64)
Returns (decode_tokens_per_sec, prefill_tokens_per_sec).
Both values are 0.0 if no time has been recorded for that phase.
Sourcepub fn kv_cache_hit_rate(&self) -> f64
pub fn kv_cache_hit_rate(&self) -> f64
Returns KV cache hit rate in the range [0.0, 1.0].
Returns 0.0 when no lookups have been recorded.
Sourcepub fn snapshot(&self) -> MetricsSnapshot
pub fn snapshot(&self) -> MetricsSnapshot
Returns a point-in-time snapshot of all counters.
Trait Implementations§
Source§impl Debug for EngineMetrics
impl Debug for EngineMetrics
Source§impl Default for EngineMetrics
impl Default for EngineMetrics
Source§fn default() -> EngineMetrics
fn default() -> EngineMetrics
Auto Trait Implementations§
impl !Freeze for EngineMetrics
impl RefUnwindSafe for EngineMetrics
impl Send for EngineMetrics
impl Sync for EngineMetrics
impl Unpin for EngineMetrics
impl UnsafeUnpin for EngineMetrics
impl UnwindSafe for EngineMetrics
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more