lsm_tree/storage_stats.rs
1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2026-present, Structured World Foundation
3
4//! Read-only storage introspection: how much is stored, the average shape of a
5//! stored entry, and an estimate of how many more entries fit in a byte budget.
6//!
7//! Computed from the live version's table + blob-file metadata plus one
8//! size-stat per live file (the same accounting `Tree::create_checkpoint`
9//! uses), so it never touches the data blocks. See
10//! [`crate::AbstractTree::storage_stats`].
11
12use crate::version::Version;
13#[cfg(not(feature = "std"))]
14use alloc::vec::Vec;
15
16/// Coarse storage state of a tree.
17///
18/// With storage admission gating off (no configured quota and a backend that
19/// cannot report free space) a tree reports [`Self::Healthy`] or, mid-run,
20/// [`Self::CompactionInProgress`]. Once gating is active (bounded capacity), an
21/// idle tree instead reports compaction availability:
22/// [`Self::FullCompactionAvailable`] when a full compaction has working room,
23/// [`Self::TightCompactionAvailable`] when only the opt-in tight-space mode
24/// would fit, and [`Self::ReadOnlyOutOfSpace`] when the write gate is closed
25/// (this takes precedence over a concurrent compaction).
26#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
27#[non_exhaustive]
28pub enum StorageStatus {
29 /// Normal operation: writes and a full compaction are available.
30 Healthy,
31 /// Enough free space for a normal (full) compaction.
32 FullCompactionAvailable,
33 /// Not enough space for a full compaction, but the opt-in tight-space
34 /// (incremental-reclaim) compaction mode can still run.
35 TightCompactionAvailable,
36 /// Out of space: the tree is read-only until space is freed or the quota
37 /// is raised.
38 ReadOnlyOutOfSpace,
39 /// A compaction is currently running.
40 CompactionInProgress,
41}
42
43/// A point-in-time snapshot of a tree's on-disk storage footprint and the
44/// average shape of a stored entry.
45///
46/// All byte figures are on-disk (post-compression, including any per-block
47/// overhead and blob files). Averages are over every stored entry version, so
48/// they pair with [`Self::item_count`].
49#[must_use]
50#[derive(Copy, Clone, Debug, Eq, PartialEq)]
51pub struct StorageStats {
52 /// Total on-disk bytes of all live SSTs plus blob files: how much is
53 /// **occupied**. Pairs with [`Self::capacity_bytes`] / [`Self::available_bytes`]
54 /// for an "X of Y used" view in a single call.
55 pub used_bytes: u64,
56
57 /// Total bytes the tree may occupy: the tighter of a configured byte quota
58 /// (`storage_limit_bytes`) and the physical disk headroom (free space plus
59 /// what is already used), across every volume the tree writes to. `None`
60 /// when unbounded: no quota set AND the backend cannot report free space.
61 pub capacity_bytes: Option<u64>,
62
63 /// Free room left before the tree turns read-only: `capacity_bytes - used_bytes`
64 /// (saturating). `None` exactly when [`Self::capacity_bytes`] is `None`
65 /// (unbounded).
66 pub available_bytes: Option<u64>,
67
68 /// Whether a compaction can still run given the remaining free space (it
69 /// needs working room to write merged output). `true` when unbounded or
70 /// when at least [`Self::tight_compaction_bytes`] of free space remains;
71 /// `false` when the disk is too full for a compaction to make progress. The
72 /// finer full-vs-tight distinction is carried by [`Self::status`].
73 pub compaction_possible: bool,
74
75 /// Estimated free space (bytes) a FULL compaction needs for its transient
76 /// output while the inputs still exist: the largest level's on-disk size
77 /// (an upper bound on a single merge's input set). A full compaction has
78 /// room when [`Self::available_bytes`] `>=` this. Pair with `used_bytes` /
79 /// `capacity_bytes` to draw a capacity gauge: `used` → `used + tight_compaction_bytes`
80 /// → `used + full_compaction_bytes` → `capacity`.
81 pub full_compaction_bytes: u64,
82
83 /// Estimated free space (bytes) a minimal (tight) space-reclaiming
84 /// compaction needs to make forward progress: the reserved working floor.
85 /// Tight compaction has room when [`Self::available_bytes`] `>=` this.
86 pub tight_compaction_bytes: u64,
87
88 /// Number of live entries (all versions) across all live SSTs.
89 pub item_count: u64,
90
91 /// Number of live SSTs.
92 pub table_count: u64,
93
94 /// Average on-disk bytes per entry (`used_bytes / item_count`), or `0` when
95 /// the tree is empty. This is the figure
96 /// [`Self::estimated_remaining_entries`] divides a budget by.
97 pub avg_entry_on_disk_bytes: u64,
98
99 /// Average user-key byte length per entry, or `None` if any live table was
100 /// written before per-table key/value byte sums were recorded (the average
101 /// key/value split is only exact when every table carries the figures).
102 pub avg_key_bytes: Option<u64>,
103
104 /// Average value byte length per entry, or `None` under the same condition
105 /// as [`Self::avg_key_bytes`].
106 pub avg_value_bytes: Option<u64>,
107
108 /// Estimated bytes a full compaction could reclaim, from the
109 /// weak-tombstone-reclaimable entry count times the average on-disk entry
110 /// size. An estimate, not an exact figure.
111 pub reclaimable_bytes_estimate: u64,
112
113 /// Coarse storage state.
114 pub status: StorageStatus,
115}
116
117/// Approximate size of a key range, estimated from SST block-index offsets and
118/// the active memtable WITHOUT reading any data block. Returned by
119/// [`crate::AbstractTree::approximate_range_stats`].
120///
121/// Both figures are estimates from the same in-range fraction per source: each
122/// overlapping SST's data-block offsets are interpolated at the range
123/// boundaries (block granularity) and that fraction is applied to the SST's
124/// byte span and its entry count, while each memtable contributes its in-range
125/// skiplist count and the matching share of its size. Accuracy is typically
126/// within ~10-15% on roughly-uniform data; it is intended for query planning
127/// (split-point selection, cost-based join ordering), not exact accounting.
128#[must_use]
129#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
130pub struct ApproximateRangeStats {
131 /// Estimated on-disk bytes occupied by the range across all overlapping
132 /// SSTs (key + pointer + apportioned blob bytes) plus the active and sealed
133 /// memtables' in-range share. `0` for an empty range.
134 pub bytes: u64,
135
136 /// Estimated number of entry versions in the range: the sum, over each
137 /// overlapping SST, of `item_count × in-range fraction`, plus each
138 /// memtable's in-range skiplist count. `0` for an empty range.
139 pub key_count: u64,
140}
141
142/// Size and entry count of one stored segment (SST), for per-segment tiering and
143/// erasure-coding placement decisions.
144#[must_use]
145#[derive(Copy, Clone, Debug, Eq, PartialEq)]
146pub struct SegmentStats {
147 /// Identifier of the segment's SST within its tree.
148 pub table_id: crate::TableId,
149 /// LSM level the segment lives in (`0` is the newest / smallest level).
150 pub level: usize,
151 /// Physical on-disk bytes of the segment's SST file.
152 pub used_bytes: u64,
153 /// Number of entry versions stored in the segment.
154 pub item_count: u64,
155 /// Cumulative point reads that consulted this segment's data since it was
156 /// created: only reads that pass the segment's seqno-range and bloom gates
157 /// count (a bloom miss is not counted), so this tracks data hotness rather
158 /// than raw probe frequency. A monotonic counter, not a rate: derive a
159 /// read-rate / EMA from the delta between successive polls. `0` when never
160 /// read.
161 pub reads: u64,
162 /// Unix seconds of the segment's most recent data-consulting read, or `0` if
163 /// never read (or on a no-std build, which keeps no clock).
164 pub last_access_secs: u64,
165}
166
167/// Per-LSM-level size + entry aggregates with the contributing segments, for
168/// tiering and erasure-coding placement (which level / segment is large enough
169/// to demote, EC-encode, or migrate).
170///
171/// Cheap to read: derived from version metadata plus one file-size stat per
172/// segment, never a data-block scan. The per-level totals reconcile with the
173/// tree-level [`StorageStats`]: summed across levels they equal the SST portion
174/// of [`StorageStats::used_bytes`] and [`StorageStats::item_count`] (blob files
175/// are tracked separately).
176#[must_use]
177#[derive(Clone, Debug, Eq, PartialEq)]
178pub struct LevelStats {
179 /// LSM level index (`0` is the newest / smallest level).
180 pub level: usize,
181 /// Number of segments (SSTs) in the level.
182 pub segment_count: usize,
183 /// Physical on-disk bytes summed across the level's segments.
184 pub used_bytes: u64,
185 /// Entry versions summed across the level's segments.
186 pub item_count: u64,
187 /// Cumulative point-read probes summed across the level's segments.
188 pub reads: u64,
189 /// Most recent point-read probe across the level's segments, in unix
190 /// seconds, or `0` if none was ever read.
191 pub last_access_secs: u64,
192 /// Per-segment breakdown, in level (run / table) order.
193 pub segments: Vec<SegmentStats>,
194}
195
196/// Approximate cardinality and selectivity of a key range, for cost-based query
197/// planning (join ordering, scan-vs-seek).
198///
199/// Both figures derive from the per-data-block zone map (per-block row counts +
200/// key ranges) when present, falling back to the byte-fraction estimate of
201/// [`ApproximateRangeStats`] otherwise. They are estimates at block granularity,
202/// never exact.
203#[must_use]
204#[derive(Copy, Clone, Debug, Default, PartialEq)]
205pub struct RangeCardinality {
206 /// Estimated number of rows (entry versions) the range covers: the sum of
207 /// the per-block row counts of every data block whose key range overlaps the
208 /// query range, plus each memtable's in-range count. `0` for an empty range.
209 pub rows: u64,
210
211 /// Estimated fraction of the tree's rows the range selects, in `0.0..=1.0`:
212 /// `rows / total_rows`. Monotonic in predicate tightness (a narrower range
213 /// never yields a larger selectivity). `0.0` when the tree is empty.
214 pub selectivity: f64,
215}
216
217/// Grouped, object-safe read-only storage-statistics surface.
218///
219/// A coherent view over a tree's non-query statistics: on-disk footprint
220/// ([`storage_stats`](Self::storage_stats)), per-level / per-segment sizing
221/// ([`level_segment_stats`](Self::level_segment_stats)), compaction debt
222/// ([`compaction_debt`](Self::compaction_debt)), and block-cache health
223/// (`cache_stats`, behind the `metrics` feature). A planner / tiering / capacity consumer
224/// bounds on `T: StorageStatistics` (or `&dyn StorageStatistics`) and a test can
225/// supply a mock. Every [`AbstractTree`](crate::AbstractTree) implements it via a
226/// blanket impl (`impl<T: AbstractTree + ?Sized> StorageStatistics for T`).
227///
228/// The per-query range estimators
229/// ([`approximate_range_stats`](crate::AbstractTree::approximate_range_stats),
230/// [`approximate_range_cardinality`](crate::AbstractTree::approximate_range_cardinality))
231/// are generic over the range type and so not object-safe; they stay on
232/// [`AbstractTree`](crate::AbstractTree) rather than joining this trait.
233pub trait StorageStatistics {
234 /// On-disk footprint and average entry shape: used / capacity / available
235 /// bytes, item & table counts, average entry size, reclaimable-bytes
236 /// estimate, and a coarse [`StorageStatus`]. See
237 /// [`StorageStats::estimated_remaining_entries`] for a budget projection.
238 ///
239 /// # Examples
240 ///
241 /// ```
242 /// # use lsm_tree::Error as TreeError;
243 /// use lsm_tree::{AbstractTree, Config, StorageStatistics};
244 ///
245 /// let folder = tempfile::tempdir()?;
246 /// let tree = Config::new(&folder, Default::default(), Default::default()).open()?;
247 /// for i in 0..100u32 {
248 /// tree.insert(format!("k{i:04}"), "v", 0);
249 /// }
250 /// tree.flush_active_memtable(0)?;
251 ///
252 /// // Both traits are in scope, so disambiguate the shared method name.
253 /// let stats = StorageStatistics::storage_stats(&tree)?;
254 /// assert_eq!(stats.item_count, 100);
255 /// // Roughly how many more average-shaped entries fit in another 1 MiB.
256 /// let _headroom = stats.estimated_remaining_entries(1024 * 1024);
257 /// #
258 /// # Ok::<(), TreeError>(())
259 /// ```
260 ///
261 /// # Errors
262 ///
263 /// Returns an error if a live file's size cannot be stat-ed.
264 fn storage_stats(&self) -> crate::Result<StorageStats>;
265
266 /// Per-LSM-level and per-segment size + entry-count stats, for tiering and
267 /// erasure-coding placement decisions (which level / segment is large enough
268 /// to demote, EC-encode, or migrate).
269 ///
270 /// Cheap: derived from the live version's metadata plus one file-size stat
271 /// per segment (no data-block scan). The per-level totals reconcile with
272 /// [`storage_stats`](Self::storage_stats): summed across levels they equal
273 /// the SST portion of [`StorageStats::used_bytes`] and
274 /// [`StorageStats::item_count`].
275 ///
276 /// # Examples
277 ///
278 /// ```
279 /// # use lsm_tree::Error as TreeError;
280 /// use lsm_tree::{AbstractTree, Config, StorageStatistics};
281 ///
282 /// let folder = tempfile::tempdir()?;
283 /// let tree = Config::new(&folder, Default::default(), Default::default()).open()?;
284 /// for i in 0..100u32 {
285 /// tree.insert(format!("k{i:04}"), "v", 0);
286 /// }
287 /// tree.flush_active_memtable(0)?;
288 ///
289 /// // Both traits are in scope, so disambiguate the shared method names.
290 /// let levels = StorageStatistics::level_segment_stats(&tree)?;
291 /// let total: u64 = levels.iter().map(|l| l.item_count).sum();
292 /// assert_eq!(total, StorageStatistics::storage_stats(&tree)?.item_count);
293 /// #
294 /// # Ok::<(), TreeError>(())
295 /// ```
296 ///
297 /// # Errors
298 ///
299 /// Returns an error if a segment's file size cannot be stat-ed.
300 fn level_segment_stats(&self) -> crate::Result<Vec<LevelStats>>;
301
302 /// Estimated bytes pending compaction under `strategy`: on-disk data above
303 /// its level's target that must eventually be rewritten downward (a `RocksDB`
304 /// `estimate-pending-compaction-bytes` analog), a compaction-debt signal for a
305 /// scheduler / tiering consumer.
306 ///
307 /// The strategy is a caller argument because the engine does not own a
308 /// configured compaction strategy (it is injected per compaction run); a
309 /// `&dyn` keeps this object-safe. Returns `0` for strategies without a
310 /// size-target notion of debt (FIFO, drop-range), or when the tree is at or
311 /// below its target shape. See
312 /// [`CompactionStrategy::pending_compaction_bytes`](crate::compaction::CompactionStrategy::pending_compaction_bytes).
313 fn compaction_debt(&self, strategy: &dyn crate::compaction::CompactionStrategy) -> u64;
314
315 /// A point-in-time [`CacheStats`](crate::CacheStats) snapshot of block-cache
316 /// effectiveness (cumulative hit / miss counts and rate) and occupancy
317 /// (current size against capacity).
318 ///
319 /// The stable, owned observability view over the block cache, so a consumer
320 /// reads cache health without holding the mutable
321 /// [`metrics`](crate::AbstractTree::metrics) handle. Counts are cumulative
322 /// since process start; derive a rate over an interval from the delta between
323 /// two polls.
324 #[cfg(feature = "metrics")]
325 fn cache_stats(&self) -> crate::CacheStats;
326}
327
328/// Every [`AbstractTree`](crate::AbstractTree) is a [`StorageStatistics`] by
329/// delegating to its own inherent stats methods, so a `Tree` / `BlobTree` can be
330/// used directly as `&dyn StorageStatistics`. The logic lives once on
331/// `AbstractTree`; this is a thin object-safe re-exposure for the grouped /
332/// mockable surface (a test mock implements `StorageStatistics` directly without
333/// being an `AbstractTree`). When both traits are in scope, disambiguate a bare
334/// `tree.storage_stats()` with `StorageStatistics::storage_stats(&tree)`.
335impl<T: crate::AbstractTree + ?Sized> StorageStatistics for T {
336 fn storage_stats(&self) -> crate::Result<StorageStats> {
337 crate::AbstractTree::storage_stats(self)
338 }
339
340 fn level_segment_stats(&self) -> crate::Result<Vec<LevelStats>> {
341 crate::AbstractTree::level_segment_stats(self)
342 }
343
344 fn compaction_debt(&self, strategy: &dyn crate::compaction::CompactionStrategy) -> u64 {
345 crate::AbstractTree::compaction_debt(self, strategy)
346 }
347
348 #[cfg(feature = "metrics")]
349 fn cache_stats(&self) -> crate::CacheStats {
350 crate::AbstractTree::cache_stats(self)
351 }
352}
353
354impl StorageStats {
355 /// Approximately how many more average-shaped entries fit in `budget_bytes`,
356 /// using [`Self::avg_entry_on_disk_bytes`].
357 ///
358 /// Returns `0` when the average entry size is unknown (an empty tree), since
359 /// there is no basis for the estimate.
360 #[must_use]
361 pub fn estimated_remaining_entries(&self, budget_bytes: u64) -> u64 {
362 if self.avg_entry_on_disk_bytes == 0 {
363 0
364 } else {
365 budget_bytes / self.avg_entry_on_disk_bytes
366 }
367 }
368}
369
370/// Sums the true physical on-disk size of every live table and blob file in
371/// `version` (one metadata stat per file).
372///
373/// This is the same physical basis [`compute_storage_stats`] reports as
374/// `used_bytes` and that `Tree::create_checkpoint` totals, so the storage
375/// admission gate agrees with both. It deliberately does NOT use
376/// `Metadata::file_size` (undercounts by the meta block / footer) or
377/// `disk_space()` (metadata `Level::size`, which also omits blob files).
378///
379/// # Errors
380///
381/// Returns an error if a live table or blob file's size cannot be stat-ed.
382pub(crate) fn compute_used_bytes(version: &Version) -> crate::Result<u64> {
383 // Sum of on-disk file sizes, bounded by the filesystem capacity → cannot
384 // overflow u64; plain arithmetic.
385 let mut used_bytes = 0u64;
386 for table in version.iter_tables() {
387 used_bytes += table.fs.metadata(&table.path)?.len;
388 }
389 for blob in version.blob_files.iter() {
390 used_bytes += blob.0.fs.metadata(&blob.0.path)?.len;
391 }
392 Ok(used_bytes)
393}
394
395/// The transient-output bound a full compaction's space check uses: the largest
396/// level's on-disk size (the `full_compaction_bytes` gauge figure), an upper
397/// bound on a single merge's input set. `0` for an empty tree.
398///
399/// This is the DEMAND. The destination VOLUME is a separate concern: a full
400/// compaction writes its output to the last configured level
401/// (`level_count - 1`), not to whichever level is currently largest, so callers
402/// pass the last level as the destination to the per-volume space check (the two
403/// differ only under tiered routing, where they can be different filesystems).
404pub(crate) fn full_compaction_demand_bytes(version: &Version) -> u64 {
405 version
406 .iter_levels()
407 .map(crate::version::Level::size)
408 .max()
409 .unwrap_or(0)
410}
411
412/// Computes [`StorageStats`] from a live version's table + blob-file metadata.
413///
414/// `is_compacting` selects [`StorageStatus::CompactionInProgress`] vs
415/// [`StorageStatus::Healthy`]; the caller supplies it because compaction state
416/// is engine-internal.
417///
418/// `value_bytes_are_user_values` must be `false` for a KV-separated
419/// (`BlobTree`) tree: there the SST records a small indirection pointer per
420/// large value, not the user value, so the per-table value-byte sum measures
421/// pointers and the value average would misreport. When `false`,
422/// [`StorageStats::avg_value_bytes`] is forced to `None`. Key bytes are never
423/// separated, so [`StorageStats::avg_key_bytes`] stays exact either way.
424///
425/// `used_bytes` is the true on-disk file size of every live table and blob
426/// file (one metadata stat per file), not the writer's `Metadata::file_size`
427/// or `crate::version::Version::blob_files`' compressed-payload sum: those
428/// undercount the physical file by the meta block / footer / blob trailer.
429/// Statting matches the figure `Tree::create_checkpoint` reports, so the two
430/// agree on disk reality.
431///
432/// # Errors
433///
434/// Returns an error if a live table or blob file's size cannot be stat-ed.
435pub(crate) fn compute_storage_stats(
436 version: &Version,
437 is_compacting: bool,
438 value_bytes_are_user_values: bool,
439) -> crate::Result<StorageStats> {
440 let mut used_bytes = 0u64;
441 let mut item_count = 0u64;
442 let mut table_count = 0u64;
443 let mut reclaimable_entries = 0u64;
444 let mut sum_key = 0u64;
445 let mut sum_value = 0u64;
446 // The key/value split is only exact when EVERY live table records the byte
447 // sums; a single legacy table without them makes the average unrepresentable.
448 let mut all_have_shape = true;
449
450 // Every running total below is a sum of on-disk byte sizes or live item
451 // counts; both are bounded by the filesystem capacity / the live entry count
452 // and cannot overflow u64, so plain arithmetic is correct (a debug-overflow
453 // would itself signal a corrupt metadata read).
454 for table in version.iter_tables() {
455 let m = &table.metadata;
456 // Physical file size, NOT m.file_size (which undercounts — see above).
457 let on_disk = table.fs.metadata(&table.path)?.len;
458 used_bytes += on_disk;
459 item_count += m.item_count;
460 table_count += 1;
461 reclaimable_entries += m.weak_tombstone_reclaimable;
462 match (m.sum_user_key_bytes, m.sum_value_bytes) {
463 (Some(k), Some(v)) => {
464 sum_key += k;
465 sum_value += v;
466 }
467 _ => all_have_shape = false,
468 }
469 }
470
471 // Physical blob-file size (metadata + trailer included), NOT
472 // BlobFileList::on_disk_size() which sums only the compressed payload.
473 for blob in version.blob_files.iter() {
474 used_bytes += blob.0.fs.metadata(&blob.0.path)?.len;
475 }
476
477 let avg_entry_on_disk_bytes = if item_count == 0 {
478 0
479 } else {
480 used_bytes / item_count
481 };
482
483 let have_shape = all_have_shape && item_count > 0;
484 let avg_key_bytes = have_shape.then(|| sum_key / item_count);
485 // Value bytes are only meaningful when not KV-separated (see param doc).
486 let avg_value_bytes =
487 (have_shape && value_bytes_are_user_values).then(|| sum_value / item_count);
488
489 // reclaimable_entries ≤ item_count and avg_entry_on_disk_bytes = used / item_count,
490 // so the product is ≤ used_bytes (bounded by disk capacity): plain multiply.
491 let reclaimable_bytes_estimate = reclaimable_entries * avg_entry_on_disk_bytes;
492
493 // A full compaction's transient output is bounded by its input set; the
494 // largest single merge is bounded by the largest level's on-disk size, so
495 // that is the free space a full compaction needs.
496 let full_compaction_bytes = full_compaction_demand_bytes(version);
497 // A minimal (tight) space-reclaiming merge needs only the reserved working
498 // floor to make forward progress.
499 let tight_compaction_bytes = crate::tree::MIN_RESERVED_HEADROOM;
500
501 let status = if is_compacting {
502 StorageStatus::CompactionInProgress
503 } else {
504 StorageStatus::Healthy
505 };
506
507 Ok(StorageStats {
508 used_bytes,
509 // Capacity is disk-aware (quota + free-space probe) and lives at the
510 // tree layer; this version-only computation leaves it unbounded. The
511 // caller (`Tree::storage_stats`) fills the real figures.
512 capacity_bytes: None,
513 available_bytes: None,
514 compaction_possible: true,
515 full_compaction_bytes,
516 tight_compaction_bytes,
517 item_count,
518 table_count,
519 avg_entry_on_disk_bytes,
520 avg_key_bytes,
521 avg_value_bytes,
522 reclaimable_bytes_estimate,
523 status,
524 })
525}
526
527/// Computes per-LSM-level and per-segment size + entry stats from a version.
528///
529/// Cost is O(levels x segments) plus one file-size stat per segment (the same
530/// stat [`compute_storage_stats`] already performs); it never reads a data block.
531///
532/// # Errors
533///
534/// Returns an error if a segment's file size cannot be stat-ed.
535pub(crate) fn compute_level_segment_stats(version: &Version) -> crate::Result<Vec<LevelStats>> {
536 use core::sync::atomic::Ordering::Relaxed;
537 let mut levels = Vec::with_capacity(version.level_count());
538 for (level, run_group) in version.iter_levels().enumerate() {
539 let mut segments = Vec::new();
540 let mut used_bytes = 0u64;
541 let mut item_count = 0u64;
542 let mut reads = 0u64;
543 let mut last_access_secs = 0u64;
544 for run in run_group.iter() {
545 for table in run.iter() {
546 // Physical file size, NOT m.file_size (which undercounts), to
547 // reconcile with the tree-level `used_bytes`.
548 let on_disk = table.fs.metadata(&table.path)?.len;
549 let items = table.metadata.item_count;
550 let seg_reads = table.read_count.load(Relaxed);
551 let seg_access = table.last_access_secs.load(Relaxed);
552 used_bytes += on_disk;
553 item_count += items;
554 reads = reads.saturating_add(seg_reads);
555 last_access_secs = last_access_secs.max(seg_access);
556 segments.push(SegmentStats {
557 table_id: table.metadata.id,
558 level,
559 used_bytes: on_disk,
560 item_count: items,
561 reads: seg_reads,
562 last_access_secs: seg_access,
563 });
564 }
565 }
566 levels.push(LevelStats {
567 level,
568 segment_count: segments.len(),
569 used_bytes,
570 item_count,
571 reads,
572 last_access_secs,
573 segments,
574 });
575 }
576 Ok(levels)
577}
578
579#[cfg(test)]
580mod tests;