Skip to main content

lsm_tree/
storage_stats.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2026-present, Structured World Foundation
3
4//! Read-only storage introspection: how much is stored, the average shape of a
5//! stored entry, and an estimate of how many more entries fit in a byte budget.
6//!
7//! Computed from the live version's table + blob-file metadata plus one
8//! size-stat per live file (the same accounting `Tree::create_checkpoint`
9//! uses), so it never touches the data blocks. See
10//! [`crate::AbstractTree::storage_stats`].
11
12use crate::version::Version;
13#[cfg(not(feature = "std"))]
14use alloc::vec::Vec;
15
16/// Coarse storage state of a tree.
17///
18/// With storage admission gating off (no configured quota and a backend that
19/// cannot report free space) a tree reports [`Self::Healthy`] or, mid-run,
20/// [`Self::CompactionInProgress`]. Once gating is active (bounded capacity), an
21/// idle tree instead reports compaction availability:
22/// [`Self::FullCompactionAvailable`] when a full compaction has working room,
23/// [`Self::TightCompactionAvailable`] when only the opt-in tight-space mode
24/// would fit, and [`Self::ReadOnlyOutOfSpace`] when the write gate is closed
25/// (this takes precedence over a concurrent compaction).
26#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
27#[non_exhaustive]
28pub enum StorageStatus {
29    /// Normal operation: writes and a full compaction are available.
30    Healthy,
31    /// Enough free space for a normal (full) compaction.
32    FullCompactionAvailable,
33    /// Not enough space for a full compaction, but the opt-in tight-space
34    /// (incremental-reclaim) compaction mode can still run.
35    TightCompactionAvailable,
36    /// Out of space: the tree is read-only until space is freed or the quota
37    /// is raised.
38    ReadOnlyOutOfSpace,
39    /// A compaction is currently running.
40    CompactionInProgress,
41}
42
43/// A point-in-time snapshot of a tree's on-disk storage footprint and the
44/// average shape of a stored entry.
45///
46/// All byte figures are on-disk (post-compression, including any per-block
47/// overhead and blob files). Averages are over every stored entry version, so
48/// they pair with [`Self::item_count`].
49#[must_use]
50#[derive(Copy, Clone, Debug, Eq, PartialEq)]
51pub struct StorageStats {
52    /// Total on-disk bytes of all live SSTs plus blob files: how much is
53    /// **occupied**. Pairs with [`Self::capacity_bytes`] / [`Self::available_bytes`]
54    /// for an "X of Y used" view in a single call.
55    pub used_bytes: u64,
56
57    /// Total bytes the tree may occupy: the tighter of a configured byte quota
58    /// (`storage_limit_bytes`) and the physical disk headroom (free space plus
59    /// what is already used), across every volume the tree writes to. `None`
60    /// when unbounded: no quota set AND the backend cannot report free space.
61    pub capacity_bytes: Option<u64>,
62
63    /// Free room left before the tree turns read-only: `capacity_bytes - used_bytes`
64    /// (saturating). `None` exactly when [`Self::capacity_bytes`] is `None`
65    /// (unbounded).
66    pub available_bytes: Option<u64>,
67
68    /// Whether a compaction can still run given the remaining free space (it
69    /// needs working room to write merged output). `true` when unbounded or
70    /// when at least [`Self::tight_compaction_bytes`] of free space remains;
71    /// `false` when the disk is too full for a compaction to make progress. The
72    /// finer full-vs-tight distinction is carried by [`Self::status`].
73    pub compaction_possible: bool,
74
75    /// Estimated free space (bytes) a FULL compaction needs for its transient
76    /// output while the inputs still exist: the largest level's on-disk size
77    /// (an upper bound on a single merge's input set). A full compaction has
78    /// room when [`Self::available_bytes`] `>=` this. Pair with `used_bytes` /
79    /// `capacity_bytes` to draw a capacity gauge: `used` → `used + tight_compaction_bytes`
80    /// → `used + full_compaction_bytes` → `capacity`.
81    pub full_compaction_bytes: u64,
82
83    /// Estimated free space (bytes) a minimal (tight) space-reclaiming
84    /// compaction needs to make forward progress: the reserved working floor.
85    /// Tight compaction has room when [`Self::available_bytes`] `>=` this.
86    pub tight_compaction_bytes: u64,
87
88    /// Number of live entries (all versions) across all live SSTs.
89    pub item_count: u64,
90
91    /// Number of live SSTs.
92    pub table_count: u64,
93
94    /// Average on-disk bytes per entry (`used_bytes / item_count`), or `0` when
95    /// the tree is empty. This is the figure
96    /// [`Self::estimated_remaining_entries`] divides a budget by.
97    pub avg_entry_on_disk_bytes: u64,
98
99    /// Average user-key byte length per entry, or `None` if any live table was
100    /// written before per-table key/value byte sums were recorded (the average
101    /// key/value split is only exact when every table carries the figures).
102    pub avg_key_bytes: Option<u64>,
103
104    /// Average value byte length per entry, or `None` under the same condition
105    /// as [`Self::avg_key_bytes`].
106    pub avg_value_bytes: Option<u64>,
107
108    /// Estimated bytes a full compaction could reclaim, from the
109    /// weak-tombstone-reclaimable entry count times the average on-disk entry
110    /// size. An estimate, not an exact figure.
111    pub reclaimable_bytes_estimate: u64,
112
113    /// Coarse storage state.
114    pub status: StorageStatus,
115}
116
117/// Approximate size of a key range, estimated from SST block-index offsets and
118/// the active memtable WITHOUT reading any data block. Returned by
119/// [`crate::AbstractTree::approximate_range_stats`].
120///
121/// Both figures are estimates from the same in-range fraction per source: each
122/// overlapping SST's data-block offsets are interpolated at the range
123/// boundaries (block granularity) and that fraction is applied to the SST's
124/// byte span and its entry count, while each memtable contributes its in-range
125/// skiplist count and the matching share of its size. Accuracy is typically
126/// within ~10-15% on roughly-uniform data; it is intended for query planning
127/// (split-point selection, cost-based join ordering), not exact accounting.
128#[must_use]
129#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
130pub struct ApproximateRangeStats {
131    /// Estimated on-disk bytes occupied by the range across all overlapping
132    /// SSTs (key + pointer + apportioned blob bytes) plus the active and sealed
133    /// memtables' in-range share. `0` for an empty range.
134    pub bytes: u64,
135
136    /// Estimated number of entry versions in the range: the sum, over each
137    /// overlapping SST, of `item_count × in-range fraction`, plus each
138    /// memtable's in-range skiplist count. `0` for an empty range.
139    pub key_count: u64,
140}
141
142/// Size and entry count of one stored segment (SST), for per-segment tiering and
143/// erasure-coding placement decisions.
144#[must_use]
145#[derive(Copy, Clone, Debug, Eq, PartialEq)]
146pub struct SegmentStats {
147    /// Identifier of the segment's SST within its tree.
148    pub table_id: crate::TableId,
149    /// LSM level the segment lives in (`0` is the newest / smallest level).
150    pub level: usize,
151    /// Physical on-disk bytes of the segment's SST file.
152    pub used_bytes: u64,
153    /// Number of entry versions stored in the segment.
154    pub item_count: u64,
155    /// Cumulative point reads that consulted this segment's data since it was
156    /// created: only reads that pass the segment's seqno-range and bloom gates
157    /// count (a bloom miss is not counted), so this tracks data hotness rather
158    /// than raw probe frequency. A monotonic counter, not a rate: derive a
159    /// read-rate / EMA from the delta between successive polls. `0` when never
160    /// read.
161    pub reads: u64,
162    /// Unix seconds of the segment's most recent data-consulting read, or `0` if
163    /// never read (or on a no-std build, which keeps no clock).
164    pub last_access_secs: u64,
165}
166
167/// Per-LSM-level size + entry aggregates with the contributing segments, for
168/// tiering and erasure-coding placement (which level / segment is large enough
169/// to demote, EC-encode, or migrate).
170///
171/// Cheap to read: derived from version metadata plus one file-size stat per
172/// segment, never a data-block scan. The per-level totals reconcile with the
173/// tree-level [`StorageStats`]: summed across levels they equal the SST portion
174/// of [`StorageStats::used_bytes`] and [`StorageStats::item_count`] (blob files
175/// are tracked separately).
176#[must_use]
177#[derive(Clone, Debug, Eq, PartialEq)]
178pub struct LevelStats {
179    /// LSM level index (`0` is the newest / smallest level).
180    pub level: usize,
181    /// Number of segments (SSTs) in the level.
182    pub segment_count: usize,
183    /// Physical on-disk bytes summed across the level's segments.
184    pub used_bytes: u64,
185    /// Entry versions summed across the level's segments.
186    pub item_count: u64,
187    /// Cumulative point-read probes summed across the level's segments.
188    pub reads: u64,
189    /// Most recent point-read probe across the level's segments, in unix
190    /// seconds, or `0` if none was ever read.
191    pub last_access_secs: u64,
192    /// Per-segment breakdown, in level (run / table) order.
193    pub segments: Vec<SegmentStats>,
194}
195
196/// Approximate cardinality and selectivity of a key range, for cost-based query
197/// planning (join ordering, scan-vs-seek).
198///
199/// Both figures derive from the per-data-block zone map (per-block row counts +
200/// key ranges) when present, falling back to the byte-fraction estimate of
201/// [`ApproximateRangeStats`] otherwise. They are estimates at block granularity,
202/// never exact.
203#[must_use]
204#[derive(Copy, Clone, Debug, Default, PartialEq)]
205pub struct RangeCardinality {
206    /// Estimated number of rows (entry versions) the range covers: the sum of
207    /// the per-block row counts of every data block whose key range overlaps the
208    /// query range, plus each memtable's in-range count. `0` for an empty range.
209    pub rows: u64,
210
211    /// Estimated fraction of the tree's rows the range selects, in `0.0..=1.0`:
212    /// `rows / total_rows`. Monotonic in predicate tightness (a narrower range
213    /// never yields a larger selectivity). `0.0` when the tree is empty.
214    pub selectivity: f64,
215}
216
217/// Grouped, object-safe read-only storage-statistics surface.
218///
219/// A coherent view over a tree's non-query statistics: on-disk footprint
220/// ([`storage_stats`](Self::storage_stats)), per-level / per-segment sizing
221/// ([`level_segment_stats`](Self::level_segment_stats)), compaction debt
222/// ([`compaction_debt`](Self::compaction_debt)), and block-cache health
223/// (`cache_stats`, behind the `metrics` feature). A planner / tiering / capacity consumer
224/// bounds on `T: StorageStatistics` (or `&dyn StorageStatistics`) and a test can
225/// supply a mock. Every [`AbstractTree`](crate::AbstractTree) implements it via a
226/// blanket impl (`impl<T: AbstractTree + ?Sized> StorageStatistics for T`).
227///
228/// The per-query range estimators
229/// ([`approximate_range_stats`](crate::AbstractTree::approximate_range_stats),
230/// [`approximate_range_cardinality`](crate::AbstractTree::approximate_range_cardinality))
231/// are generic over the range type and so not object-safe; they stay on
232/// [`AbstractTree`](crate::AbstractTree) rather than joining this trait.
233pub trait StorageStatistics {
234    /// On-disk footprint and average entry shape: used / capacity / available
235    /// bytes, item & table counts, average entry size, reclaimable-bytes
236    /// estimate, and a coarse [`StorageStatus`]. See
237    /// [`StorageStats::estimated_remaining_entries`] for a budget projection.
238    ///
239    /// # Examples
240    ///
241    /// ```
242    /// # use lsm_tree::Error as TreeError;
243    /// use lsm_tree::{AbstractTree, Config, StorageStatistics};
244    ///
245    /// let folder = tempfile::tempdir()?;
246    /// let tree = Config::new(&folder, Default::default(), Default::default()).open()?;
247    /// for i in 0..100u32 {
248    ///     tree.insert(format!("k{i:04}"), "v", 0);
249    /// }
250    /// tree.flush_active_memtable(0)?;
251    ///
252    /// // Both traits are in scope, so disambiguate the shared method name.
253    /// let stats = StorageStatistics::storage_stats(&tree)?;
254    /// assert_eq!(stats.item_count, 100);
255    /// // Roughly how many more average-shaped entries fit in another 1 MiB.
256    /// let _headroom = stats.estimated_remaining_entries(1024 * 1024);
257    /// #
258    /// # Ok::<(), TreeError>(())
259    /// ```
260    ///
261    /// # Errors
262    ///
263    /// Returns an error if a live file's size cannot be stat-ed.
264    fn storage_stats(&self) -> crate::Result<StorageStats>;
265
266    /// Per-LSM-level and per-segment size + entry-count stats, for tiering and
267    /// erasure-coding placement decisions (which level / segment is large enough
268    /// to demote, EC-encode, or migrate).
269    ///
270    /// Cheap: derived from the live version's metadata plus one file-size stat
271    /// per segment (no data-block scan). The per-level totals reconcile with
272    /// [`storage_stats`](Self::storage_stats): summed across levels they equal
273    /// the SST portion of [`StorageStats::used_bytes`] and
274    /// [`StorageStats::item_count`].
275    ///
276    /// # Examples
277    ///
278    /// ```
279    /// # use lsm_tree::Error as TreeError;
280    /// use lsm_tree::{AbstractTree, Config, StorageStatistics};
281    ///
282    /// let folder = tempfile::tempdir()?;
283    /// let tree = Config::new(&folder, Default::default(), Default::default()).open()?;
284    /// for i in 0..100u32 {
285    ///     tree.insert(format!("k{i:04}"), "v", 0);
286    /// }
287    /// tree.flush_active_memtable(0)?;
288    ///
289    /// // Both traits are in scope, so disambiguate the shared method names.
290    /// let levels = StorageStatistics::level_segment_stats(&tree)?;
291    /// let total: u64 = levels.iter().map(|l| l.item_count).sum();
292    /// assert_eq!(total, StorageStatistics::storage_stats(&tree)?.item_count);
293    /// #
294    /// # Ok::<(), TreeError>(())
295    /// ```
296    ///
297    /// # Errors
298    ///
299    /// Returns an error if a segment's file size cannot be stat-ed.
300    fn level_segment_stats(&self) -> crate::Result<Vec<LevelStats>>;
301
302    /// Estimated bytes pending compaction under `strategy`: on-disk data above
303    /// its level's target that must eventually be rewritten downward (a `RocksDB`
304    /// `estimate-pending-compaction-bytes` analog), a compaction-debt signal for a
305    /// scheduler / tiering consumer.
306    ///
307    /// The strategy is a caller argument because the engine does not own a
308    /// configured compaction strategy (it is injected per compaction run); a
309    /// `&dyn` keeps this object-safe. Returns `0` for strategies without a
310    /// size-target notion of debt (FIFO, drop-range), or when the tree is at or
311    /// below its target shape. See
312    /// [`CompactionStrategy::pending_compaction_bytes`](crate::compaction::CompactionStrategy::pending_compaction_bytes).
313    fn compaction_debt(&self, strategy: &dyn crate::compaction::CompactionStrategy) -> u64;
314
315    /// A point-in-time [`CacheStats`](crate::CacheStats) snapshot of block-cache
316    /// effectiveness (cumulative hit / miss counts and rate) and occupancy
317    /// (current size against capacity).
318    ///
319    /// The stable, owned observability view over the block cache, so a consumer
320    /// reads cache health without holding the mutable
321    /// [`metrics`](crate::AbstractTree::metrics) handle. Counts are cumulative
322    /// since process start; derive a rate over an interval from the delta between
323    /// two polls.
324    #[cfg(feature = "metrics")]
325    fn cache_stats(&self) -> crate::CacheStats;
326}
327
328/// Every [`AbstractTree`](crate::AbstractTree) is a [`StorageStatistics`] by
329/// delegating to its own inherent stats methods, so a `Tree` / `BlobTree` can be
330/// used directly as `&dyn StorageStatistics`. The logic lives once on
331/// `AbstractTree`; this is a thin object-safe re-exposure for the grouped /
332/// mockable surface (a test mock implements `StorageStatistics` directly without
333/// being an `AbstractTree`). When both traits are in scope, disambiguate a bare
334/// `tree.storage_stats()` with `StorageStatistics::storage_stats(&tree)`.
335impl<T: crate::AbstractTree + ?Sized> StorageStatistics for T {
336    fn storage_stats(&self) -> crate::Result<StorageStats> {
337        crate::AbstractTree::storage_stats(self)
338    }
339
340    fn level_segment_stats(&self) -> crate::Result<Vec<LevelStats>> {
341        crate::AbstractTree::level_segment_stats(self)
342    }
343
344    fn compaction_debt(&self, strategy: &dyn crate::compaction::CompactionStrategy) -> u64 {
345        crate::AbstractTree::compaction_debt(self, strategy)
346    }
347
348    #[cfg(feature = "metrics")]
349    fn cache_stats(&self) -> crate::CacheStats {
350        crate::AbstractTree::cache_stats(self)
351    }
352}
353
354impl StorageStats {
355    /// Approximately how many more average-shaped entries fit in `budget_bytes`,
356    /// using [`Self::avg_entry_on_disk_bytes`].
357    ///
358    /// Returns `0` when the average entry size is unknown (an empty tree), since
359    /// there is no basis for the estimate.
360    #[must_use]
361    pub fn estimated_remaining_entries(&self, budget_bytes: u64) -> u64 {
362        if self.avg_entry_on_disk_bytes == 0 {
363            0
364        } else {
365            budget_bytes / self.avg_entry_on_disk_bytes
366        }
367    }
368}
369
370/// Sums the true physical on-disk size of every live table and blob file in
371/// `version` (one metadata stat per file).
372///
373/// This is the same physical basis [`compute_storage_stats`] reports as
374/// `used_bytes` and that `Tree::create_checkpoint` totals, so the storage
375/// admission gate agrees with both. It deliberately does NOT use
376/// `Metadata::file_size` (undercounts by the meta block / footer) or
377/// `disk_space()` (metadata `Level::size`, which also omits blob files).
378///
379/// # Errors
380///
381/// Returns an error if a live table or blob file's size cannot be stat-ed.
382pub(crate) fn compute_used_bytes(version: &Version) -> crate::Result<u64> {
383    // Sum of on-disk file sizes, bounded by the filesystem capacity → cannot
384    // overflow u64; plain arithmetic.
385    let mut used_bytes = 0u64;
386    for table in version.iter_tables() {
387        used_bytes += table.fs.metadata(&table.path)?.len;
388    }
389    for blob in version.blob_files.iter() {
390        used_bytes += blob.0.fs.metadata(&blob.0.path)?.len;
391    }
392    Ok(used_bytes)
393}
394
395/// The transient-output bound a full compaction's space check uses: the largest
396/// level's on-disk size (the `full_compaction_bytes` gauge figure), an upper
397/// bound on a single merge's input set. `0` for an empty tree.
398///
399/// This is the DEMAND. The destination VOLUME is a separate concern: a full
400/// compaction writes its output to the last configured level
401/// (`level_count - 1`), not to whichever level is currently largest, so callers
402/// pass the last level as the destination to the per-volume space check (the two
403/// differ only under tiered routing, where they can be different filesystems).
404pub(crate) fn full_compaction_demand_bytes(version: &Version) -> u64 {
405    version
406        .iter_levels()
407        .map(crate::version::Level::size)
408        .max()
409        .unwrap_or(0)
410}
411
412/// Computes [`StorageStats`] from a live version's table + blob-file metadata.
413///
414/// `is_compacting` selects [`StorageStatus::CompactionInProgress`] vs
415/// [`StorageStatus::Healthy`]; the caller supplies it because compaction state
416/// is engine-internal.
417///
418/// `value_bytes_are_user_values` must be `false` for a KV-separated
419/// (`BlobTree`) tree: there the SST records a small indirection pointer per
420/// large value, not the user value, so the per-table value-byte sum measures
421/// pointers and the value average would misreport. When `false`,
422/// [`StorageStats::avg_value_bytes`] is forced to `None`. Key bytes are never
423/// separated, so [`StorageStats::avg_key_bytes`] stays exact either way.
424///
425/// `used_bytes` is the true on-disk file size of every live table and blob
426/// file (one metadata stat per file), not the writer's `Metadata::file_size`
427/// or `crate::version::Version::blob_files`' compressed-payload sum: those
428/// undercount the physical file by the meta block / footer / blob trailer.
429/// Statting matches the figure `Tree::create_checkpoint` reports, so the two
430/// agree on disk reality.
431///
432/// # Errors
433///
434/// Returns an error if a live table or blob file's size cannot be stat-ed.
435pub(crate) fn compute_storage_stats(
436    version: &Version,
437    is_compacting: bool,
438    value_bytes_are_user_values: bool,
439) -> crate::Result<StorageStats> {
440    let mut used_bytes = 0u64;
441    let mut item_count = 0u64;
442    let mut table_count = 0u64;
443    let mut reclaimable_entries = 0u64;
444    let mut sum_key = 0u64;
445    let mut sum_value = 0u64;
446    // The key/value split is only exact when EVERY live table records the byte
447    // sums; a single legacy table without them makes the average unrepresentable.
448    let mut all_have_shape = true;
449
450    // Every running total below is a sum of on-disk byte sizes or live item
451    // counts; both are bounded by the filesystem capacity / the live entry count
452    // and cannot overflow u64, so plain arithmetic is correct (a debug-overflow
453    // would itself signal a corrupt metadata read).
454    for table in version.iter_tables() {
455        let m = &table.metadata;
456        // Physical file size, NOT m.file_size (which undercounts — see above).
457        let on_disk = table.fs.metadata(&table.path)?.len;
458        used_bytes += on_disk;
459        item_count += m.item_count;
460        table_count += 1;
461        reclaimable_entries += m.weak_tombstone_reclaimable;
462        match (m.sum_user_key_bytes, m.sum_value_bytes) {
463            (Some(k), Some(v)) => {
464                sum_key += k;
465                sum_value += v;
466            }
467            _ => all_have_shape = false,
468        }
469    }
470
471    // Physical blob-file size (metadata + trailer included), NOT
472    // BlobFileList::on_disk_size() which sums only the compressed payload.
473    for blob in version.blob_files.iter() {
474        used_bytes += blob.0.fs.metadata(&blob.0.path)?.len;
475    }
476
477    let avg_entry_on_disk_bytes = if item_count == 0 {
478        0
479    } else {
480        used_bytes / item_count
481    };
482
483    let have_shape = all_have_shape && item_count > 0;
484    let avg_key_bytes = have_shape.then(|| sum_key / item_count);
485    // Value bytes are only meaningful when not KV-separated (see param doc).
486    let avg_value_bytes =
487        (have_shape && value_bytes_are_user_values).then(|| sum_value / item_count);
488
489    // reclaimable_entries ≤ item_count and avg_entry_on_disk_bytes = used / item_count,
490    // so the product is ≤ used_bytes (bounded by disk capacity): plain multiply.
491    let reclaimable_bytes_estimate = reclaimable_entries * avg_entry_on_disk_bytes;
492
493    // A full compaction's transient output is bounded by its input set; the
494    // largest single merge is bounded by the largest level's on-disk size, so
495    // that is the free space a full compaction needs.
496    let full_compaction_bytes = full_compaction_demand_bytes(version);
497    // A minimal (tight) space-reclaiming merge needs only the reserved working
498    // floor to make forward progress.
499    let tight_compaction_bytes = crate::tree::MIN_RESERVED_HEADROOM;
500
501    let status = if is_compacting {
502        StorageStatus::CompactionInProgress
503    } else {
504        StorageStatus::Healthy
505    };
506
507    Ok(StorageStats {
508        used_bytes,
509        // Capacity is disk-aware (quota + free-space probe) and lives at the
510        // tree layer; this version-only computation leaves it unbounded. The
511        // caller (`Tree::storage_stats`) fills the real figures.
512        capacity_bytes: None,
513        available_bytes: None,
514        compaction_possible: true,
515        full_compaction_bytes,
516        tight_compaction_bytes,
517        item_count,
518        table_count,
519        avg_entry_on_disk_bytes,
520        avg_key_bytes,
521        avg_value_bytes,
522        reclaimable_bytes_estimate,
523        status,
524    })
525}
526
527/// Computes per-LSM-level and per-segment size + entry stats from a version.
528///
529/// Cost is O(levels x segments) plus one file-size stat per segment (the same
530/// stat [`compute_storage_stats`] already performs); it never reads a data block.
531///
532/// # Errors
533///
534/// Returns an error if a segment's file size cannot be stat-ed.
535pub(crate) fn compute_level_segment_stats(version: &Version) -> crate::Result<Vec<LevelStats>> {
536    use core::sync::atomic::Ordering::Relaxed;
537    let mut levels = Vec::with_capacity(version.level_count());
538    for (level, run_group) in version.iter_levels().enumerate() {
539        let mut segments = Vec::new();
540        let mut used_bytes = 0u64;
541        let mut item_count = 0u64;
542        let mut reads = 0u64;
543        let mut last_access_secs = 0u64;
544        for run in run_group.iter() {
545            for table in run.iter() {
546                // Physical file size, NOT m.file_size (which undercounts), to
547                // reconcile with the tree-level `used_bytes`.
548                let on_disk = table.fs.metadata(&table.path)?.len;
549                let items = table.metadata.item_count;
550                let seg_reads = table.read_count.load(Relaxed);
551                let seg_access = table.last_access_secs.load(Relaxed);
552                used_bytes += on_disk;
553                item_count += items;
554                reads = reads.saturating_add(seg_reads);
555                last_access_secs = last_access_secs.max(seg_access);
556                segments.push(SegmentStats {
557                    table_id: table.metadata.id,
558                    level,
559                    used_bytes: on_disk,
560                    item_count: items,
561                    reads: seg_reads,
562                    last_access_secs: seg_access,
563                });
564            }
565        }
566        levels.push(LevelStats {
567            level,
568            segment_count: segments.len(),
569            used_bytes,
570            item_count,
571            reads,
572            last_access_secs,
573            segments,
574        });
575    }
576    Ok(levels)
577}
578
579#[cfg(test)]
580mod tests;