1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
use std::sync::atomic::{AtomicU64, Ordering};
/// Snapshot of live database, storage, maintenance, cache, and read-path stats.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct DbStats {
/// Number of bucket states currently loaded.
pub live_buckets: usize,
/// Number of pinned snapshots.
pub active_snapshots: usize,
/// Approximate bytes held by active memtables.
pub memtable_bytes: u64,
/// Number of immutable memtables waiting for flush.
pub immutable_memtables: usize,
/// Number of level-0 table files.
pub l0_tables: usize,
/// Total number of table files across all levels.
pub total_tables: usize,
/// Per-level table counts and byte sizes.
pub level_tables: Vec<LevelStats>,
/// Total bytes held by table files.
pub table_bytes: u64,
/// WAL bytes accepted but not yet synced according to the selected durability.
pub wal_bytes_pending_sync: u64,
/// Number of blob files referenced by live records.
pub live_blob_files: usize,
/// Bytes in blob files referenced by live records.
pub live_blob_bytes: u64,
/// Number of blob files with discardable bytes.
pub stale_blob_files: usize,
/// Discardable bytes in stale blob files.
pub stale_blob_bytes: u64,
/// Number of blob files no longer referenced by live records.
pub obsolete_blob_files: usize,
/// Bytes in obsolete blob files.
pub obsolete_blob_bytes: u64,
/// Number of blob garbage-collection runs.
pub blob_gc_runs: u64,
/// Input bytes scanned by blob garbage collection.
pub blob_gc_input_bytes: u64,
/// Output bytes written by blob garbage collection.
pub blob_gc_output_bytes: u64,
/// Bytes discarded by blob garbage collection.
pub blob_gc_discarded_bytes: u64,
/// Number of blob value reads.
pub blob_read_count: u64,
/// Bytes returned by blob value reads.
pub blob_read_bytes: u64,
/// Number of compaction runs.
pub compaction_runs: u64,
/// Table files read by compaction.
pub compaction_input_tables: u64,
/// Table files written by compaction.
pub compaction_output_tables: u64,
/// Table bytes read by compaction.
pub compaction_input_bytes: u64,
/// Table bytes written by compaction.
pub compaction_output_bytes: u64,
/// Commit sequences allocated by writers.
pub commit_sequences_allocated: u64,
/// Highest commit sequence visible to readers.
pub commit_visible_sequence: u64,
/// Commit slots allocated but not yet published or skipped.
pub commit_open_slots: usize,
/// Commit slots skipped after failed publication.
pub commit_skipped_slots: u64,
/// Number of WAL shards configured for the database.
pub wal_shards: usize,
/// Number of WAL shards currently open.
pub wal_open_shards: usize,
/// Per-shard WAL queue capacity.
pub wal_queue_capacity: usize,
/// WAL records accepted by the writer path.
pub wal_records_accepted: u64,
/// WAL bytes accepted by the writer path.
pub wal_bytes_accepted: u64,
/// Whether storage work is using the runtime's sync adapter.
pub storage_uses_sync_adapter: bool,
/// Whether storage work is using platform async I/O.
pub storage_uses_platform_async_io: bool,
/// Blocking storage tasks accepted by the sync adapter.
pub storage_sync_adapter_tasks: u64,
/// Sync-adapter queue capacity.
pub storage_sync_adapter_queue_capacity: usize,
/// Sync-adapter tasks currently queued.
pub storage_sync_adapter_queued_tasks: usize,
/// Sync-adapter tasks submitted.
pub storage_sync_adapter_submitted_tasks: u64,
/// Sync-adapter tasks completed.
pub storage_sync_adapter_completed_tasks: u64,
/// Sync-adapter tasks rejected because the queue was full or unavailable.
pub storage_sync_adapter_rejected_tasks: u64,
/// Total runtime spent by sync-adapter tasks.
pub storage_sync_adapter_total_runtime_micros: u64,
/// Storage tasks completed through platform async I/O.
pub storage_platform_async_io_tasks: u64,
/// Storage tasks that used a backend fallback path.
pub storage_platform_backend_fallback_tasks: u64,
/// Storage tasks that used a synchronous fallback path.
pub storage_platform_sync_fallback_tasks: u64,
/// Storage tasks completed inline.
pub storage_inline_tasks: u64,
/// Per-operation storage request counters and latency totals.
pub storage_operations: StorageOperationStats,
/// Cooperative maintenance yields.
pub maintenance_cooperative_yields: u64,
/// Maintenance runs stopped after exhausting their budget.
pub maintenance_budget_exhaustions: u64,
/// Block-cache hits.
pub block_cache_hits: u64,
/// Block-cache misses.
pub block_cache_misses: u64,
/// Point-read path counters.
pub read_path: ReadPathStats,
/// Filter hit, miss, and false-positive counters.
pub filters: FilterStats,
}
/// Request count and total latency for one storage operation.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct StorageOperationMetric {
/// Number of operation requests.
pub requests: u64,
/// Total operation latency in microseconds.
pub total_latency_micros: u64,
}
/// Per-operation storage metrics.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct StorageOperationStats {
/// Opens of readable storage objects.
pub open_read: StorageOperationMetric,
/// Length queries for readable storage objects.
pub len: StorageOperationMetric,
/// Borrowed-buffer positioned reads.
pub read_exact_at: StorageOperationMetric,
/// Owned-buffer positioned reads.
pub read_exact_at_owned: StorageOperationMetric,
/// Whole-object byte reads.
pub read_object_bytes: StorageOperationMetric,
/// Opens of appendable storage objects.
pub open_append: StorageOperationMetric,
/// Appends to storage objects.
pub append: StorageOperationMetric,
/// Persistence requests for storage objects.
pub persist: StorageOperationMetric,
/// WAL rewrite operations.
pub rewrite_wal: StorageOperationMetric,
/// Writer-lease acquisition requests.
pub acquire_writer_lease: StorageOperationMetric,
/// Directory creation requests.
pub create_directory_all: StorageOperationMetric,
/// Directory file-list requests.
pub list_directory_files: StorageOperationMetric,
/// Directory sync requests after atomic renames.
pub sync_directory_after_renames: StorageOperationMetric,
/// Reads of the current manifest pointer.
pub read_current_manifest: StorageOperationMetric,
/// Manifest publish operations.
pub publish_manifest: StorageOperationMetric,
/// Whole-object writes.
pub write_object: StorageOperationMetric,
/// Object delete requests.
pub delete_object: StorageOperationMetric,
/// Object list requests.
pub list_objects: StorageOperationMetric,
}
#[derive(Debug, Default)]
pub(crate) struct BlobReadMetrics {
count: AtomicU64,
bytes: AtomicU64,
}
impl BlobReadMetrics {
pub(crate) fn record(&self, bytes: u64) {
self.count.fetch_add(1, Ordering::Relaxed);
self.bytes.fetch_add(bytes, Ordering::Relaxed);
}
pub(crate) fn snapshot(&self) -> (u64, u64) {
(
self.count.load(Ordering::Acquire),
self.bytes.load(Ordering::Acquire),
)
}
}
/// Table count and byte size for one LSM level.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct LevelStats {
/// LSM level number.
pub level: u32,
/// Number of table files in the level.
pub tables: usize,
/// Total bytes in the level's table files.
pub bytes: u64,
}
/// Filter counters for table-level and block-level filters.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct FilterStats {
/// Table key-filter positive results for point reads.
pub table_point_hits: u64,
/// Table key-filter negative results for point reads.
pub table_point_misses: u64,
/// Table key-filter positives that did not contain the key.
pub table_point_false_positives: u64,
/// Table prefix-filter positive results for prefix reads.
pub table_prefix_hits: u64,
/// Table prefix-filter negative results for prefix reads.
pub table_prefix_misses: u64,
/// Table prefix-filter positives that did not contain the prefix.
pub table_prefix_false_positives: u64,
/// Block key-filter positive results for point reads.
pub block_point_hits: u64,
/// Block key-filter negative results for point reads.
pub block_point_misses: u64,
/// Block key-filter positives that did not contain the key.
pub block_point_false_positives: u64,
/// Block prefix-filter positive results for prefix reads.
pub block_prefix_hits: u64,
/// Block prefix-filter negative results for prefix reads.
pub block_prefix_misses: u64,
/// Block prefix-filter positives that did not contain the prefix.
pub block_prefix_false_positives: u64,
}
/// Read-path counters that describe how far reads travel through table metadata.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct ReadPathStats {
/// Table files considered by point reads.
pub point_table_probes: u64,
/// Index partitions considered by point reads.
pub point_index_partition_probes: u64,
/// Data-block metadata entries considered by point reads.
pub point_block_metadata_probes: u64,
/// Data blocks read by point reads.
pub point_data_block_reads: u64,
/// Point reads skipped because filters ruled out a table or block.
pub point_filter_misses: u64,
/// Table files considered by prefix scans.
pub prefix_table_probes: u64,
/// Data-block metadata entries considered by prefix scans.
pub prefix_block_metadata_probes: u64,
/// Data blocks read by prefix scans.
pub prefix_data_block_reads: u64,
/// Prefix scan work skipped because filters ruled out a table or block.
pub prefix_filter_misses: u64,
}
impl ReadPathStats {
pub(crate) fn saturating_add_assign(&mut self, other: Self) {
self.point_table_probes = self
.point_table_probes
.saturating_add(other.point_table_probes);
self.point_index_partition_probes = self
.point_index_partition_probes
.saturating_add(other.point_index_partition_probes);
self.point_block_metadata_probes = self
.point_block_metadata_probes
.saturating_add(other.point_block_metadata_probes);
self.point_data_block_reads = self
.point_data_block_reads
.saturating_add(other.point_data_block_reads);
self.point_filter_misses = self
.point_filter_misses
.saturating_add(other.point_filter_misses);
self.prefix_table_probes = self
.prefix_table_probes
.saturating_add(other.prefix_table_probes);
self.prefix_block_metadata_probes = self
.prefix_block_metadata_probes
.saturating_add(other.prefix_block_metadata_probes);
self.prefix_data_block_reads = self
.prefix_data_block_reads
.saturating_add(other.prefix_data_block_reads);
self.prefix_filter_misses = self
.prefix_filter_misses
.saturating_add(other.prefix_filter_misses);
}
}
impl FilterStats {
pub(crate) fn saturating_add_assign(&mut self, other: Self) {
self.table_point_hits = self.table_point_hits.saturating_add(other.table_point_hits);
self.table_point_misses = self
.table_point_misses
.saturating_add(other.table_point_misses);
self.table_point_false_positives = self
.table_point_false_positives
.saturating_add(other.table_point_false_positives);
self.table_prefix_hits = self
.table_prefix_hits
.saturating_add(other.table_prefix_hits);
self.table_prefix_misses = self
.table_prefix_misses
.saturating_add(other.table_prefix_misses);
self.table_prefix_false_positives = self
.table_prefix_false_positives
.saturating_add(other.table_prefix_false_positives);
self.block_point_hits = self.block_point_hits.saturating_add(other.block_point_hits);
self.block_point_misses = self
.block_point_misses
.saturating_add(other.block_point_misses);
self.block_point_false_positives = self
.block_point_false_positives
.saturating_add(other.block_point_false_positives);
self.block_prefix_hits = self
.block_prefix_hits
.saturating_add(other.block_prefix_hits);
self.block_prefix_misses = self
.block_prefix_misses
.saturating_add(other.block_prefix_misses);
self.block_prefix_false_positives = self
.block_prefix_false_positives
.saturating_add(other.block_prefix_false_positives);
}
}