1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
//! Storage operation metrics for monitoring and debugging.
//!
//! Provides latency tracking for critical storage operations:
//! - `ensure_capacity`: mmap resize operations (P0 - critical for P99 latency)
//!
//! # P0 Audit Recommendation
//!
//! The `ensure_capacity` operation can cause "stop-the-world" pauses during
//! large resizes (e.g., 2GB → 4GB). Monitoring P99 latency is essential.
//!
//! # PERF-001: Lock-Free Implementation
//!
//! Uses `LockFreeHistogram` for wait-free latency recording in the hot path.
//! No mutex contention even under high concurrency.
use super::histogram::LockFreeHistogram;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::{Duration, Instant};
/// Storage operation metrics collector.
///
/// Thread-safe metrics collection using lock-free data structures.
/// Designed for minimal overhead in the hot path (wait-free recording).
#[derive(Debug)]
pub struct StorageMetrics {
/// Total number of actual resize operations
resize_count: AtomicU64,
/// Total bytes resized
total_bytes_resized: AtomicU64,
/// PERF-001: Lock-free histogram for latency tracking
latency_histogram: LockFreeHistogram,
}
impl Default for StorageMetrics {
fn default() -> Self {
Self::new()
}
}
impl StorageMetrics {
/// Creates a new metrics collector.
#[must_use]
pub fn new() -> Self {
Self {
resize_count: AtomicU64::new(0),
total_bytes_resized: AtomicU64::new(0),
latency_histogram: LockFreeHistogram::new(),
}
}
/// Records an `ensure_capacity` operation. Wait-free operation.
///
/// # Arguments
///
/// * `latency` - Duration of the operation
/// * `did_resize` - Whether an actual resize occurred
/// * `bytes_resized` - Number of bytes added (0 if no resize)
#[inline]
pub fn record_ensure_capacity(&self, latency: Duration, did_resize: bool, bytes_resized: u64) {
// PERF-001: Wait-free latency recording
#[allow(clippy::cast_possible_truncation)]
let micros = latency.as_micros().min(u128::from(u64::MAX)) as u64;
self.latency_histogram.record(micros);
if did_resize {
self.resize_count.fetch_add(1, Ordering::Relaxed);
self.total_bytes_resized
.fetch_add(bytes_resized, Ordering::Relaxed);
}
}
/// Returns the total number of `ensure_capacity` calls.
#[must_use]
pub fn ensure_capacity_count(&self) -> u64 {
self.latency_histogram.count()
}
/// Returns true if no metrics have been recorded yet.
#[must_use]
pub fn is_empty(&self) -> bool {
self.latency_histogram.is_empty()
}
/// Returns the number of actual resize operations.
#[must_use]
pub fn resize_count(&self) -> u64 {
self.resize_count.load(Ordering::Relaxed)
}
/// Returns the total bytes resized.
#[must_use]
pub fn total_bytes_resized(&self) -> u64 {
self.total_bytes_resized.load(Ordering::Relaxed)
}
/// Returns latency statistics for `ensure_capacity` operations.
#[must_use]
pub fn ensure_capacity_latency_stats(&self) -> LatencyStats {
LatencyStats {
count: self.latency_histogram.count(),
min_us: self.latency_histogram.min(),
max_us: self.latency_histogram.max(),
mean_us: self.latency_histogram.mean(),
p50_us: self.latency_histogram.percentile(50),
p95_us: self.latency_histogram.percentile(95),
p99_us: self.latency_histogram.percentile(99),
}
}
/// Resets all metrics to zero.
pub fn reset(&self) {
self.resize_count.store(0, Ordering::Relaxed);
self.total_bytes_resized.store(0, Ordering::Relaxed);
self.latency_histogram.reset();
}
}
/// Latency statistics with percentiles.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct LatencyStats {
/// Number of samples
pub count: u64,
/// Minimum latency in microseconds
pub min_us: u64,
/// Maximum latency in microseconds
pub max_us: u64,
/// Mean latency in microseconds
pub mean_us: u64,
/// 50th percentile (median) in microseconds
pub p50_us: u64,
/// 95th percentile in microseconds
pub p95_us: u64,
/// 99th percentile in microseconds
pub p99_us: u64,
}
impl LatencyStats {
/// Returns the P99 latency as a Duration.
#[must_use]
pub fn p99(&self) -> Duration {
Duration::from_micros(self.p99_us)
}
/// Returns the P95 latency as a Duration.
#[must_use]
pub fn p95(&self) -> Duration {
Duration::from_micros(self.p95_us)
}
/// Returns the P50 (median) latency as a Duration.
#[must_use]
pub fn p50(&self) -> Duration {
Duration::from_micros(self.p50_us)
}
/// Returns the mean latency as a Duration.
#[must_use]
pub fn mean(&self) -> Duration {
Duration::from_micros(self.mean_us)
}
/// Returns true if P99 latency exceeds the threshold.
///
/// # Arguments
///
/// * `threshold` - Maximum acceptable P99 latency
#[must_use]
pub fn p99_exceeds(&self, threshold: Duration) -> bool {
self.p99() > threshold
}
}
/// RAII guard for timing operations.
///
/// Automatically records the elapsed time when dropped.
pub struct TimingGuard<'a, F>
where
F: FnOnce(Duration),
{
start: Instant,
callback: Option<F>,
_marker: std::marker::PhantomData<&'a ()>,
}
impl<F> TimingGuard<'_, F>
where
F: FnOnce(Duration),
{
/// Creates a new timing guard that will call the callback with elapsed time on drop.
pub fn new(callback: F) -> Self {
Self {
start: Instant::now(),
callback: Some(callback),
_marker: std::marker::PhantomData,
}
}
/// Returns the elapsed time since creation.
#[must_use]
pub fn elapsed(&self) -> Duration {
self.start.elapsed()
}
}
impl<F> Drop for TimingGuard<'_, F>
where
F: FnOnce(Duration),
{
fn drop(&mut self) {
if let Some(cb) = self.callback.take() {
cb(self.start.elapsed());
}
}
}