Skip to main content

d_engine_server/storage/adaptors/rocksdb/
mod.rs

1mod rocksdb_state_machine;
2mod rocksdb_storage_engine;
3mod rocksdb_unified_engine;
4
5pub use rocksdb_state_machine::*;
6pub use rocksdb_storage_engine::*;
7pub use rocksdb_unified_engine::RocksDBUnifiedEngine;
8
9#[cfg(test)]
10mod rocksdb_state_machine_test;
11
12#[cfg(test)]
13mod rocksdb_storage_engine_test;
14
15#[cfg(test)]
16mod rocksdb_unified_engine_test;
17
18// Column family names — single source of truth for all RocksDB adaptors
19pub(super) const LOG_CF: &str = "logs";
20pub(super) const META_CF: &str = "meta";
21pub(super) const STATE_MACHINE_CF: &str = "state_machine";
22pub(super) const STATE_MACHINE_META_CF: &str = "state_machine_meta";
23
24// ── Shared DB + CF option functions ──────────────────────────────────────────
25// Both One DB and Two DB paths call these functions so tuning changes apply uniformly.
26
27use rocksdb::BlockBasedOptions;
28use rocksdb::Cache;
29use rocksdb::DBCompactionStyle;
30use rocksdb::Options;
31
32/// Base DB-level options shared by all RocksDB adaptors.
33///
34/// Each adaptor may override individual settings (e.g. `max_background_jobs`,
35/// `set_db_write_buffer_size`) after calling this function.
36pub(super) fn base_db_options() -> Options {
37    let mut opts = Options::default();
38    opts.create_if_missing(true);
39    opts.create_missing_column_families(true);
40    // IO-smoothing hint: triggers async sync_file_range every 1 MB of WAL writes.
41    // This is NOT fdatasync — it spreads dirty page writeback to avoid IO spikes,
42    // but provides no durability guarantee. Pairs well with Level 3 (flush_wal) if added.
43    opts.set_wal_bytes_per_sync(1024 * 1024);
44    opts.set_max_background_jobs(4);
45    opts.set_max_open_files(5000);
46    opts.set_use_direct_io_for_flush_and_compaction(true);
47    opts.set_use_direct_reads(true);
48    opts.set_level_compaction_dynamic_level_bytes(true);
49    opts.set_target_file_size_base(64 * 1024 * 1024);
50    opts.set_max_bytes_for_level_base(256 * 1024 * 1024);
51    opts
52}
53
54/// Log CF: sequential writes, range reads, prefix truncation.
55/// Universal Compaction reduces write amplification for append-only + bulk-delete workload.
56pub(super) fn log_cf_options(cache: &Cache) -> Options {
57    let mut opts = Options::default();
58    opts.set_write_buffer_size(128 * 1024 * 1024);
59    opts.set_max_write_buffer_number(4);
60    opts.set_min_write_buffer_number_to_merge(2);
61    opts.set_compression_type(rocksdb::DBCompressionType::Lz4);
62    opts.set_bottommost_compression_type(rocksdb::DBCompressionType::Zstd);
63    opts.set_compression_options(-14, 0, 0, 0);
64    opts.set_compaction_style(DBCompactionStyle::Universal);
65
66    let mut bb = BlockBasedOptions::default();
67    bb.set_block_cache(cache);
68    opts.set_block_based_table_factory(&bb);
69    opts
70}
71
72/// SM CF: high-frequency random reads/writes (user KV data).
73pub(super) fn sm_cf_options(cache: &Cache) -> Options {
74    let mut opts = Options::default();
75    opts.set_write_buffer_size(64 * 1024 * 1024);
76    opts.set_max_write_buffer_number(4);
77    opts.set_min_write_buffer_number_to_merge(2);
78    opts.set_compression_type(rocksdb::DBCompressionType::Lz4);
79    opts.set_bottommost_compression_type(rocksdb::DBCompressionType::Zstd);
80
81    let mut bb = BlockBasedOptions::default();
82    bb.set_block_cache(cache);
83    bb.set_bloom_filter(10.0, false);
84    bb.set_cache_index_and_filter_blocks(true);
85    opts.set_block_based_table_factory(&bb);
86    opts
87}
88
89/// Meta CF: low-frequency point reads/writes (term, vote, applied index, snapshot metadata).
90pub(super) fn meta_cf_options(cache: &Cache) -> Options {
91    let mut bb = BlockBasedOptions::default();
92    bb.set_block_cache(cache);
93    bb.set_bloom_filter(10.0, false);
94    let mut opts = Options::default();
95    opts.set_block_based_table_factory(&bb);
96    opts
97}