Skip to main content

rust_rocksdb/
perf.rs

1// Copyright 2020 Tran Tuan Linh
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use libc::{c_int, c_uchar};
16use std::marker::PhantomData;
17
18use crate::cache::Cache;
19use crate::ffi_util::from_cstr_and_free;
20use crate::{DB, DBCommon, ThreadMode, TransactionDB};
21use crate::{Error, db::DBInner, ffi};
22
23#[derive(Debug, Copy, Clone, PartialEq, Eq)]
24#[repr(i32)]
25pub enum PerfStatsLevel {
26    /// Unknown settings
27    Uninitialized = 0,
28    /// Disable perf stats
29    Disable,
30    /// Enables only count stats
31    EnableCount,
32    /// Count stats and enable time stats except for mutexes
33    EnableTimeExceptForMutex,
34    /// Other than time, also measure CPU time counters. Still don't measure
35    /// time (neither wall time nor CPU time) for mutexes
36    EnableTimeAndCPUTimeExceptForMutex,
37    /// Enables count and time stats
38    EnableTime,
39    /// N.B must always be the last value!
40    OutOfBound,
41}
42
43// Include the generated PerfMetric enum from perf_enum.rs
44include!("perf_enum.rs");
45
46/// Sets the perf stats level for current thread.
47pub fn set_perf_stats(lvl: PerfStatsLevel) {
48    unsafe {
49        ffi::rocksdb_set_perf_level(lvl as c_int);
50    }
51}
52
53/// Thread local context for gathering performance counter efficiently
54/// and transparently.
55pub struct PerfContext {
56    pub(crate) inner: *mut ffi::rocksdb_perfcontext_t,
57}
58
59impl Default for PerfContext {
60    fn default() -> Self {
61        let ctx = unsafe { ffi::rocksdb_perfcontext_create() };
62        assert!(!ctx.is_null(), "Could not create Perf Context");
63
64        Self { inner: ctx }
65    }
66}
67
68impl Drop for PerfContext {
69    fn drop(&mut self) {
70        unsafe {
71            ffi::rocksdb_perfcontext_destroy(self.inner);
72        }
73    }
74}
75
76impl PerfContext {
77    /// Reset context
78    pub fn reset(&mut self) {
79        unsafe {
80            ffi::rocksdb_perfcontext_reset(self.inner);
81        }
82    }
83
84    /// Get the report on perf
85    pub fn report(&self, exclude_zero_counters: bool) -> String {
86        unsafe {
87            let ptr =
88                ffi::rocksdb_perfcontext_report(self.inner, c_uchar::from(exclude_zero_counters));
89            from_cstr_and_free(ptr)
90        }
91    }
92
93    /// Returns value of a metric
94    pub fn metric(&self, id: PerfMetric) -> u64 {
95        unsafe { ffi::rocksdb_perfcontext_metric(self.inner, id as c_int) }
96    }
97}
98
99/// Memory usage stats
100pub struct MemoryUsageStats {
101    /// Approximate memory usage of all the mem-tables
102    pub mem_table_total: u64,
103    /// Approximate memory usage of un-flushed mem-tables
104    pub mem_table_unflushed: u64,
105    /// Approximate memory usage of all the table readers
106    pub mem_table_readers_total: u64,
107    /// Approximate memory usage by cache
108    pub cache_total: u64,
109}
110
111/// Wrap over memory_usage_t. Hold current memory usage of the specified DB instances and caches
112pub struct MemoryUsage {
113    inner: *mut ffi::rocksdb_memory_usage_t,
114}
115
116impl Drop for MemoryUsage {
117    fn drop(&mut self) {
118        unsafe {
119            ffi::rocksdb_approximate_memory_usage_destroy(self.inner);
120        }
121    }
122}
123
124impl MemoryUsage {
125    /// Approximate memory usage of all the mem-tables
126    pub fn approximate_mem_table_total(&self) -> u64 {
127        unsafe { ffi::rocksdb_approximate_memory_usage_get_mem_table_total(self.inner) }
128    }
129
130    /// Approximate memory usage of un-flushed mem-tables
131    pub fn approximate_mem_table_unflushed(&self) -> u64 {
132        unsafe { ffi::rocksdb_approximate_memory_usage_get_mem_table_unflushed(self.inner) }
133    }
134
135    /// Approximate memory usage of all the table readers
136    pub fn approximate_mem_table_readers_total(&self) -> u64 {
137        unsafe { ffi::rocksdb_approximate_memory_usage_get_mem_table_readers_total(self.inner) }
138    }
139
140    /// Approximate memory usage by cache
141    pub fn approximate_cache_total(&self) -> u64 {
142        unsafe { ffi::rocksdb_approximate_memory_usage_get_cache_total(self.inner) }
143    }
144}
145
146/// Creates [`MemoryUsage`] from DBs and caches.
147///
148/// Most users should call [`get_memory_usage_stats`] instead.
149///
150/// A `MemoryUsageBuilder` must not outlive the `DB`s added to it:
151///
152/// ```compile_fail,E0597
153/// use rust_rocksdb::{perf::MemoryUsageBuilder, DB};
154///
155/// let mut builder = MemoryUsageBuilder::new().unwrap();
156/// {
157///     let db = DB::open_default("foo").unwrap();
158///     builder.add_db(&db);
159/// }
160/// let _memory_usage = builder.build().unwrap();
161/// ```
162pub struct MemoryUsageBuilder<'a> {
163    inner: *mut ffi::rocksdb_memory_consumers_t,
164    base_dbs: Vec<*mut ffi::rocksdb_t>,
165    // must not outlive the DBs/caches that are added
166    _marker: PhantomData<&'a ()>,
167}
168
169impl Drop for MemoryUsageBuilder<'_> {
170    fn drop(&mut self) {
171        unsafe {
172            ffi::rocksdb_memory_consumers_destroy(self.inner);
173        }
174        for base_db in &self.base_dbs {
175            unsafe {
176                ffi::rocksdb_transactiondb_close_base_db(*base_db);
177            }
178        }
179    }
180}
181
182impl<'a> MemoryUsageBuilder<'a> {
183    /// Create new instance
184    pub fn new() -> Result<Self, Error> {
185        let mc = unsafe { ffi::rocksdb_memory_consumers_create() };
186        if mc.is_null() {
187            Err(Error::new(
188                "Could not create MemoryUsage builder".to_owned(),
189            ))
190        } else {
191            Ok(Self {
192                inner: mc,
193                base_dbs: Vec::new(),
194                _marker: PhantomData,
195            })
196        }
197    }
198
199    /// Add a DB instance to collect memory usage from it and add up in total stats
200    pub fn add_tx_db<T: ThreadMode>(&mut self, db: &'a TransactionDB<T>) {
201        unsafe {
202            let base_db = ffi::rocksdb_transactiondb_get_base_db(db.inner);
203            ffi::rocksdb_memory_consumers_add_db(self.inner, base_db);
204            // rocksdb_transactiondb_get_base_db allocates a struct that must be freed
205            self.base_dbs.push(base_db);
206        }
207    }
208
209    /// Add a DB instance to collect memory usage from it and add up in total stats
210    pub fn add_db<T: ThreadMode, D: DBInner>(&mut self, db: &'a DBCommon<T, D>) {
211        unsafe {
212            ffi::rocksdb_memory_consumers_add_db(self.inner, db.inner.inner());
213        }
214    }
215
216    /// Add a cache to collect memory usage from it and add up in total stats
217    pub fn add_cache(&mut self, cache: &'a Cache) {
218        unsafe {
219            ffi::rocksdb_memory_consumers_add_cache(self.inner, cache.0.inner.as_ptr());
220        }
221    }
222
223    /// Build up MemoryUsage
224    pub fn build(&self) -> Result<MemoryUsage, Error> {
225        unsafe {
226            let mu = ffi_try!(ffi::rocksdb_approximate_memory_usage_create(self.inner));
227            Ok(MemoryUsage { inner: mu })
228        }
229    }
230}
231
232/// Get memory usage stats from DB instances and Cache instances
233pub fn get_memory_usage_stats(
234    dbs: Option<&[&DB]>,
235    caches: Option<&[&Cache]>,
236) -> Result<MemoryUsageStats, Error> {
237    let mut builder = MemoryUsageBuilder::new()?;
238    if let Some(dbs_) = dbs {
239        for db in dbs_ {
240            builder.add_db(db);
241        }
242    }
243    if let Some(caches_) = caches {
244        for cache in caches_ {
245            builder.add_cache(cache);
246        }
247    }
248
249    let mu = builder.build()?;
250    Ok(MemoryUsageStats {
251        mem_table_total: mu.approximate_mem_table_total(),
252        mem_table_unflushed: mu.approximate_mem_table_unflushed(),
253        mem_table_readers_total: mu.approximate_mem_table_readers_total(),
254        cache_total: mu.approximate_cache_total(),
255    })
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261    use crate::{DB, Options};
262    use tempfile::TempDir;
263
264    #[test]
265    fn test_perf_context_with_db_operations() {
266        let temp_dir = TempDir::new().unwrap();
267        let mut opts = Options::default();
268        opts.create_if_missing(true);
269        let db = DB::open(&opts, temp_dir.path()).unwrap();
270
271        // Insert data with deletions to test internal key/delete skipping
272        let n = 10;
273        for i in 0..n {
274            let k = vec![i as u8];
275            db.put(&k, &k).unwrap();
276            if i % 2 == 0 {
277                db.delete(&k).unwrap();
278            }
279        }
280
281        set_perf_stats(PerfStatsLevel::EnableCount);
282        let mut ctx = PerfContext::default();
283
284        // Use iterator with explicit seek to trigger metrics
285        let mut iter = db.raw_iterator();
286        iter.seek_to_first();
287        let mut valid_count = 0;
288        while iter.valid() {
289            valid_count += 1;
290            iter.next();
291        }
292
293        // Check counts - should have 5 valid entries (odd numbers: 1,3,5,7,9)
294        assert_eq!(
295            valid_count, 5,
296            "Iterator should find 5 valid entries (odd numbers)"
297        );
298
299        // Check internal skip metrics
300        let internal_key_skipped = ctx.metric(PerfMetric::InternalKeySkippedCount);
301        let internal_delete_skipped = ctx.metric(PerfMetric::InternalDeleteSkippedCount);
302
303        // In RocksDB, when iterating over deleted keys in SST files:
304        // - We should skip the deletion markers (n/2 = 5 deletes)
305        // - Total internal keys skipped should be >= number of deletions
306        assert!(
307            internal_key_skipped >= (n / 2) as u64,
308            "internal_key_skipped ({}) should be >= {} (deletions)",
309            internal_key_skipped,
310            n / 2
311        );
312        assert_eq!(
313            internal_delete_skipped,
314            (n / 2) as u64,
315            "internal_delete_skipped ({internal_delete_skipped}) should equal {} (deleted entries)",
316            n / 2
317        );
318        assert_eq!(
319            ctx.metric(PerfMetric::SeekInternalSeekTime),
320            0,
321            "Time metrics should be 0 with EnableCount"
322        );
323
324        // Test reset
325        ctx.reset();
326        assert_eq!(ctx.metric(PerfMetric::InternalKeySkippedCount), 0);
327        assert_eq!(ctx.metric(PerfMetric::InternalDeleteSkippedCount), 0);
328
329        // Change perf level to EnableTime
330        set_perf_stats(PerfStatsLevel::EnableTime);
331
332        // Iterate backwards
333        let mut iter = db.raw_iterator();
334        iter.seek_to_last();
335        let mut backward_count = 0;
336        while iter.valid() {
337            backward_count += 1;
338            iter.prev();
339        }
340        assert_eq!(
341            backward_count, 5,
342            "Backward iteration should also find 5 valid entries"
343        );
344
345        // Check accumulated metrics after second iteration
346        let key_skipped_after = ctx.metric(PerfMetric::InternalKeySkippedCount);
347        let delete_skipped_after = ctx.metric(PerfMetric::InternalDeleteSkippedCount);
348
349        // After both iterations, we should have accumulated more skipped keys
350        assert!(
351            key_skipped_after >= internal_key_skipped,
352            "After second iteration, internal_key_skipped ({key_skipped_after}) should be >= first iteration ({internal_key_skipped})",
353        );
354        assert_eq!(
355            delete_skipped_after,
356            (n / 2) as u64,
357            "internal_delete_skipped should still be {} after second iteration",
358            n / 2
359        );
360
361        // Disable perf stats
362        set_perf_stats(PerfStatsLevel::Disable);
363    }
364}