rust_rocksdb/
perf.rs

1// Copyright 2020 Tran Tuan Linh
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use libc::{c_int, c_uchar, c_void};
16
17use crate::cache::Cache;
18use crate::{db::DBInner, ffi, ffi_util::from_cstr, Error};
19use crate::{DBCommon, ThreadMode, TransactionDB, DB};
20
21#[derive(Debug, Copy, Clone, PartialEq, Eq)]
22#[repr(i32)]
23pub enum PerfStatsLevel {
24    /// Unknown settings
25    Uninitialized = 0,
26    /// Disable perf stats
27    Disable,
28    /// Enables only count stats
29    EnableCount,
30    /// Count stats and enable time stats except for mutexes
31    EnableTimeExceptForMutex,
32    /// Other than time, also measure CPU time counters. Still don't measure
33    /// time (neither wall time nor CPU time) for mutexes
34    EnableTimeAndCPUTimeExceptForMutex,
35    /// Enables count and time stats
36    EnableTime,
37    /// N.B must always be the last value!
38    OutOfBound,
39}
40
41// Include the generated PerfMetric enum from perf_enum.rs
42include!("perf_enum.rs");
43
44/// Sets the perf stats level for current thread.
45pub fn set_perf_stats(lvl: PerfStatsLevel) {
46    unsafe {
47        ffi::rocksdb_set_perf_level(lvl as c_int);
48    }
49}
50
51/// Thread local context for gathering performance counter efficiently
52/// and transparently.
53pub struct PerfContext {
54    pub(crate) inner: *mut ffi::rocksdb_perfcontext_t,
55}
56
57impl Default for PerfContext {
58    fn default() -> Self {
59        let ctx = unsafe { ffi::rocksdb_perfcontext_create() };
60        assert!(!ctx.is_null(), "Could not create Perf Context");
61
62        Self { inner: ctx }
63    }
64}
65
66impl Drop for PerfContext {
67    fn drop(&mut self) {
68        unsafe {
69            ffi::rocksdb_perfcontext_destroy(self.inner);
70        }
71    }
72}
73
74impl PerfContext {
75    /// Reset context
76    pub fn reset(&mut self) {
77        unsafe {
78            ffi::rocksdb_perfcontext_reset(self.inner);
79        }
80    }
81
82    /// Get the report on perf
83    pub fn report(&self, exclude_zero_counters: bool) -> String {
84        unsafe {
85            let ptr =
86                ffi::rocksdb_perfcontext_report(self.inner, c_uchar::from(exclude_zero_counters));
87            let report = from_cstr(ptr);
88            ffi::rocksdb_free(ptr as *mut c_void);
89            report
90        }
91    }
92
93    /// Returns value of a metric
94    pub fn metric(&self, id: PerfMetric) -> u64 {
95        unsafe { ffi::rocksdb_perfcontext_metric(self.inner, id as c_int) }
96    }
97}
98
99/// Memory usage stats
100pub struct MemoryUsageStats {
101    /// Approximate memory usage of all the mem-tables
102    pub mem_table_total: u64,
103    /// Approximate memory usage of un-flushed mem-tables
104    pub mem_table_unflushed: u64,
105    /// Approximate memory usage of all the table readers
106    pub mem_table_readers_total: u64,
107    /// Approximate memory usage by cache
108    pub cache_total: u64,
109}
110
111/// Wrap over memory_usage_t. Hold current memory usage of the specified DB instances and caches
112pub struct MemoryUsage {
113    inner: *mut ffi::rocksdb_memory_usage_t,
114}
115
116impl Drop for MemoryUsage {
117    fn drop(&mut self) {
118        unsafe {
119            ffi::rocksdb_approximate_memory_usage_destroy(self.inner);
120        }
121    }
122}
123
124impl MemoryUsage {
125    /// Approximate memory usage of all the mem-tables
126    pub fn approximate_mem_table_total(&self) -> u64 {
127        unsafe { ffi::rocksdb_approximate_memory_usage_get_mem_table_total(self.inner) }
128    }
129
130    /// Approximate memory usage of un-flushed mem-tables
131    pub fn approximate_mem_table_unflushed(&self) -> u64 {
132        unsafe { ffi::rocksdb_approximate_memory_usage_get_mem_table_unflushed(self.inner) }
133    }
134
135    /// Approximate memory usage of all the table readers
136    pub fn approximate_mem_table_readers_total(&self) -> u64 {
137        unsafe { ffi::rocksdb_approximate_memory_usage_get_mem_table_readers_total(self.inner) }
138    }
139
140    /// Approximate memory usage by cache
141    pub fn approximate_cache_total(&self) -> u64 {
142        unsafe { ffi::rocksdb_approximate_memory_usage_get_cache_total(self.inner) }
143    }
144}
145
146/// Builder for MemoryUsage
147pub struct MemoryUsageBuilder {
148    inner: *mut ffi::rocksdb_memory_consumers_t,
149}
150
151impl Drop for MemoryUsageBuilder {
152    fn drop(&mut self) {
153        unsafe {
154            ffi::rocksdb_memory_consumers_destroy(self.inner);
155        }
156    }
157}
158
159impl MemoryUsageBuilder {
160    /// Create new instance
161    pub fn new() -> Result<Self, Error> {
162        let mc = unsafe { ffi::rocksdb_memory_consumers_create() };
163        if mc.is_null() {
164            Err(Error::new(
165                "Could not create MemoryUsage builder".to_owned(),
166            ))
167        } else {
168            Ok(Self { inner: mc })
169        }
170    }
171
172    /// Add a DB instance to collect memory usage from it and add up in total stats
173    pub fn add_tx_db<T: ThreadMode>(&mut self, db: &TransactionDB<T>) {
174        unsafe {
175            let base = ffi::rocksdb_transactiondb_get_base_db(db.inner);
176            ffi::rocksdb_memory_consumers_add_db(self.inner, base);
177        }
178    }
179
180    /// Add a DB instance to collect memory usage from it and add up in total stats
181    pub fn add_db<T: ThreadMode, D: DBInner>(&mut self, db: &DBCommon<T, D>) {
182        unsafe {
183            ffi::rocksdb_memory_consumers_add_db(self.inner, db.inner.inner());
184        }
185    }
186
187    /// Add a cache to collect memory usage from it and add up in total stats
188    pub fn add_cache(&mut self, cache: &Cache) {
189        unsafe {
190            ffi::rocksdb_memory_consumers_add_cache(self.inner, cache.0.inner.as_ptr());
191        }
192    }
193
194    /// Build up MemoryUsage
195    pub fn build(&self) -> Result<MemoryUsage, Error> {
196        unsafe {
197            let mu = ffi_try!(ffi::rocksdb_approximate_memory_usage_create(self.inner));
198            Ok(MemoryUsage { inner: mu })
199        }
200    }
201}
202
203/// Get memory usage stats from DB instances and Cache instances
204pub fn get_memory_usage_stats(
205    dbs: Option<&[&DB]>,
206    caches: Option<&[&Cache]>,
207) -> Result<MemoryUsageStats, Error> {
208    let mut builder = MemoryUsageBuilder::new()?;
209    if let Some(dbs_) = dbs {
210        for db in dbs_ {
211            builder.add_db(db);
212        }
213    }
214    if let Some(caches_) = caches {
215        for cache in caches_ {
216            builder.add_cache(cache);
217        }
218    }
219
220    let mu = builder.build()?;
221    Ok(MemoryUsageStats {
222        mem_table_total: mu.approximate_mem_table_total(),
223        mem_table_unflushed: mu.approximate_mem_table_unflushed(),
224        mem_table_readers_total: mu.approximate_mem_table_readers_total(),
225        cache_total: mu.approximate_cache_total(),
226    })
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232    use crate::{Options, DB};
233    use tempfile::TempDir;
234
235    #[test]
236    fn test_perf_context_with_db_operations() {
237        let temp_dir = TempDir::new().unwrap();
238        let mut opts = Options::default();
239        opts.create_if_missing(true);
240        let db = DB::open(&opts, temp_dir.path()).unwrap();
241
242        // Insert data with deletions to test internal key/delete skipping
243        let n = 10;
244        for i in 0..n {
245            let k = vec![i as u8];
246            db.put(&k, &k).unwrap();
247            if i % 2 == 0 {
248                db.delete(&k).unwrap();
249            }
250        }
251
252        set_perf_stats(PerfStatsLevel::EnableCount);
253        let mut ctx = PerfContext::default();
254
255        // Use iterator with explicit seek to trigger metrics
256        let mut iter = db.raw_iterator();
257        iter.seek_to_first();
258        let mut valid_count = 0;
259        while iter.valid() {
260            valid_count += 1;
261            iter.next();
262        }
263
264        // Check counts - should have 5 valid entries (odd numbers: 1,3,5,7,9)
265        assert_eq!(
266            valid_count, 5,
267            "Iterator should find 5 valid entries (odd numbers)"
268        );
269
270        // Check internal skip metrics
271        let internal_key_skipped = ctx.metric(PerfMetric::InternalKeySkippedCount);
272        let internal_delete_skipped = ctx.metric(PerfMetric::InternalDeleteSkippedCount);
273
274        // In RocksDB, when iterating over deleted keys in SST files:
275        // - We should skip the deletion markers (n/2 = 5 deletes)
276        // - Total internal keys skipped should be >= number of deletions
277        assert!(
278            internal_key_skipped >= (n / 2) as u64,
279            "internal_key_skipped ({}) should be >= {} (deletions)",
280            internal_key_skipped,
281            n / 2
282        );
283        assert_eq!(
284            internal_delete_skipped,
285            (n / 2) as u64,
286            "internal_delete_skipped ({internal_delete_skipped}) should equal {} (deleted entries)",
287            n / 2
288        );
289        assert_eq!(
290            ctx.metric(PerfMetric::SeekInternalSeekTime),
291            0,
292            "Time metrics should be 0 with EnableCount"
293        );
294
295        // Test reset
296        ctx.reset();
297        assert_eq!(ctx.metric(PerfMetric::InternalKeySkippedCount), 0);
298        assert_eq!(ctx.metric(PerfMetric::InternalDeleteSkippedCount), 0);
299
300        // Change perf level to EnableTime
301        set_perf_stats(PerfStatsLevel::EnableTime);
302
303        // Iterate backwards
304        let mut iter = db.raw_iterator();
305        iter.seek_to_last();
306        let mut backward_count = 0;
307        while iter.valid() {
308            backward_count += 1;
309            iter.prev();
310        }
311        assert_eq!(
312            backward_count, 5,
313            "Backward iteration should also find 5 valid entries"
314        );
315
316        // Check accumulated metrics after second iteration
317        let key_skipped_after = ctx.metric(PerfMetric::InternalKeySkippedCount);
318        let delete_skipped_after = ctx.metric(PerfMetric::InternalDeleteSkippedCount);
319
320        // After both iterations, we should have accumulated more skipped keys
321        assert!(
322            key_skipped_after >= internal_key_skipped,
323            "After second iteration, internal_key_skipped ({key_skipped_after}) should be >= first iteration ({internal_key_skipped})",
324        );
325        assert_eq!(
326            delete_skipped_after,
327            (n / 2) as u64,
328            "internal_delete_skipped should still be {} after second iteration",
329            n / 2
330        );
331
332        // Disable perf stats
333        set_perf_stats(PerfStatsLevel::Disable);
334    }
335}