Skip to main content

lsm_tree/config/
mod.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5mod block_size;
6mod compression;
7mod filter;
8mod hash_ratio;
9mod pinning;
10mod restart_interval;
11
12pub use block_size::BlockSizePolicy;
13pub use compression::CompressionPolicy;
14pub use filter::{BloomConstructionPolicy, FilterPolicy, FilterPolicyEntry};
15pub use hash_ratio::HashRatioPolicy;
16pub use pinning::PinningPolicy;
17pub use restart_interval::RestartIntervalPolicy;
18
19/// Partitioning policy for indexes and filters
20pub type PartitioningPolicy = PinningPolicy;
21
22use crate::{
23    compaction::filter::Factory,
24    comparator::{self, SharedComparator},
25    encryption::EncryptionProvider,
26    fs::{Fs, StdFs},
27    merge_operator::MergeOperator,
28    path::absolute_path,
29    prefix::PrefixExtractor,
30    version::DEFAULT_LEVEL_COUNT,
31    AnyTree, BlobTree, Cache, CompressionType, DescriptorTable, SequenceNumberCounter,
32    SharedSequenceNumberGenerator, Tree,
33};
34use std::{
35    path::{Path, PathBuf},
36    sync::Arc,
37};
38
39/// LSM-tree type
40#[derive(Copy, Clone, Debug, PartialEq, Eq)]
41pub enum TreeType {
42    /// Standard LSM-tree, see [`Tree`]
43    Standard,
44
45    /// Key-value separated LSM-tree, see [`BlobTree`]
46    Blob,
47}
48
49impl From<TreeType> for u8 {
50    fn from(val: TreeType) -> Self {
51        match val {
52            TreeType::Standard => 0,
53            TreeType::Blob => 1,
54        }
55    }
56}
57
58impl TryFrom<u8> for TreeType {
59    type Error = ();
60
61    fn try_from(value: u8) -> Result<Self, Self::Error> {
62        match value {
63            0 => Ok(Self::Standard),
64            1 => Ok(Self::Blob),
65            _ => Err(()),
66        }
67    }
68}
69
70const DEFAULT_FILE_FOLDER: &str = ".lsm.data";
71
72/// Options for key-value separation
73#[derive(Clone, Debug, PartialEq)]
74pub struct KvSeparationOptions {
75    /// What type of compression is used for blobs
76    #[doc(hidden)]
77    pub compression: CompressionType,
78
79    /// Blob file target size in bytes
80    #[doc(hidden)]
81    pub file_target_size: u64,
82
83    /// Key-value separation threshold in bytes
84    #[doc(hidden)]
85    pub separation_threshold: u32,
86
87    #[doc(hidden)]
88    pub staleness_threshold: f32,
89
90    #[doc(hidden)]
91    pub age_cutoff: f32,
92}
93
94impl Default for KvSeparationOptions {
95    fn default() -> Self {
96        Self {
97            #[cfg(feature="lz4")]
98            compression:   CompressionType::Lz4,
99
100            #[cfg(not(feature="lz4"))]
101            compression: CompressionType::None,
102
103            file_target_size: /* 64 MiB */ 64 * 1_024 * 1_024,
104            separation_threshold: /* 1 KiB */ 1_024,
105
106            staleness_threshold: 0.25,
107            age_cutoff: 0.25,
108        }
109    }
110}
111
112impl KvSeparationOptions {
113    /// Sets the blob compression method.
114    #[must_use]
115    pub fn compression(mut self, compression: CompressionType) -> Self {
116        self.compression = compression;
117        self
118    }
119
120    /// Sets the target size of blob files.
121    ///
122    /// Smaller blob files allow more granular garbage collection
123    /// which allows lower space amp for lower write I/O cost.
124    ///
125    /// Larger blob files decrease the number of files on disk and maintenance
126    /// overhead.
127    ///
128    /// Defaults to 64 MiB.
129    #[must_use]
130    pub fn file_target_size(mut self, bytes: u64) -> Self {
131        self.file_target_size = bytes;
132        self
133    }
134
135    /// Sets the key-value separation threshold in bytes.
136    ///
137    /// Smaller value will reduce compaction overhead and thus write amplification,
138    /// at the cost of lower read performance.
139    ///
140    /// Defaults to 1 KiB.
141    #[must_use]
142    pub fn separation_threshold(mut self, bytes: u32) -> Self {
143        self.separation_threshold = bytes;
144        self
145    }
146
147    /// Sets the staleness threshold percentage.
148    ///
149    /// The staleness percentage determines how much a blob file needs to be fragmented to be
150    /// picked up by the garbage collection.
151    ///
152    /// Defaults to 33%.
153    #[must_use]
154    pub fn staleness_threshold(mut self, ratio: f32) -> Self {
155        self.staleness_threshold = ratio;
156        self
157    }
158
159    /// Sets the age cutoff threshold.
160    ///
161    /// Defaults to 20%.
162    #[must_use]
163    pub fn age_cutoff(mut self, ratio: f32) -> Self {
164        self.age_cutoff = ratio;
165        self
166    }
167}
168
169/// Tree configuration builder
170///
171/// The generic parameter `F` selects the filesystem backend.
172/// It defaults to [`StdFs`], so existing code that writes `Config`
173/// without a type parameter continues to work unchanged.
174pub struct Config<F: Fs = StdFs> {
175    /// Folder path
176    #[doc(hidden)]
177    pub path: PathBuf,
178
179    /// Filesystem backend
180    ///
181    // All Config fields are `#[doc(hidden)] pub` by convention — callers use
182    // builder methods or `..Default::default()`, not struct literals directly.
183    // A `with_fs()` builder will be added when call-site refactoring lands.
184    #[doc(hidden)]
185    pub fs: Arc<F>,
186
187    /// Block cache to use
188    #[doc(hidden)]
189    pub cache: Arc<Cache>,
190
191    /// Descriptor table to use
192    #[doc(hidden)]
193    pub descriptor_table: Option<Arc<DescriptorTable>>,
194
195    /// Number of levels of the LSM tree (depth of tree)
196    ///
197    /// Once set, the level count is fixed (in the "manifest" file)
198    pub level_count: u8,
199
200    /// What type of compression is used for data blocks
201    pub data_block_compression_policy: CompressionPolicy,
202
203    /// What type of compression is used for index blocks
204    pub index_block_compression_policy: CompressionPolicy,
205
206    /// Restart interval inside data blocks
207    pub data_block_restart_interval_policy: RestartIntervalPolicy,
208
209    /// Restart interval inside index blocks
210    pub index_block_restart_interval_policy: RestartIntervalPolicy,
211
212    /// Block size of data blocks
213    pub data_block_size_policy: BlockSizePolicy,
214
215    /// Whether to pin index blocks
216    pub index_block_pinning_policy: PinningPolicy,
217
218    /// Whether to pin filter blocks
219    pub filter_block_pinning_policy: PinningPolicy,
220
221    /// Whether to pin top level index of partitioned index
222    pub top_level_index_block_pinning_policy: PinningPolicy,
223
224    /// Whether to pin top level index of partitioned filter
225    pub top_level_filter_block_pinning_policy: PinningPolicy,
226
227    /// Data block hash ratio
228    pub data_block_hash_ratio_policy: HashRatioPolicy,
229
230    /// Whether to partition index blocks
231    pub index_block_partitioning_policy: PartitioningPolicy,
232
233    /// Whether to partition filter blocks
234    pub filter_block_partitioning_policy: PartitioningPolicy,
235
236    /// Partition size when using partitioned indexes
237    pub index_block_partition_size_policy: BlockSizePolicy,
238
239    /// Partition size when using partitioned filters
240    pub filter_block_partition_size_policy: BlockSizePolicy,
241
242    /// If `true`, the last level will not build filters, reducing the filter size of a database
243    /// by ~90% typically
244    pub(crate) expect_point_read_hits: bool,
245
246    /// Filter construction policy
247    pub filter_policy: FilterPolicy,
248
249    /// Compaction filter factory
250    pub compaction_filter_factory: Option<Arc<dyn Factory>>,
251
252    /// Prefix extractor for prefix bloom filters.
253    ///
254    /// When set, the bloom filter indexes extracted prefixes in addition to
255    /// full keys, allowing prefix scans to skip segments that contain no
256    /// matching prefixes.
257    pub prefix_extractor: Option<Arc<dyn PrefixExtractor>>,
258
259    /// Merge operator for commutative operations
260    ///
261    /// When set, enables `merge()` operations that store partial updates
262    /// which are lazily combined during reads and compaction.
263    pub merge_operator: Option<Arc<dyn MergeOperator>>,
264
265    #[doc(hidden)]
266    pub kv_separation_opts: Option<KvSeparationOptions>,
267
268    /// Custom user key comparator.
269    ///
270    /// When set, all key comparisons use this comparator instead of the
271    /// default lexicographic byte ordering. Once a tree is opened with a
272    /// comparator, it must always be re-opened with the same comparator.
273    // Not `pub` — use `Config::comparator()` builder method as the public API.
274    #[doc(hidden)]
275    pub(crate) comparator: SharedComparator,
276
277    /// Block-level encryption provider for encryption at rest.
278    ///
279    /// When set, all blocks (data, index, filter, meta) are encrypted
280    /// using this provider after compression and before checksumming.
281    pub(crate) encryption: Option<Arc<dyn EncryptionProvider>>,
282
283    /// Pre-trained zstd dictionary for dictionary compression.
284    ///
285    /// When set together with a [`CompressionType::ZstdDict`] compression
286    /// policy, data blocks are compressed using this dictionary. The
287    /// dictionary must remain the same for the lifetime of the tree —
288    /// opening a tree with a different dictionary will produce
289    /// [`Error::ZstdDictMismatch`](crate::Error::ZstdDictMismatch) errors.
290    #[cfg(feature = "zstd")]
291    pub(crate) zstd_dictionary: Option<Arc<crate::compression::ZstdDictionary>>,
292
293    /// The global sequence number generator.
294    ///
295    /// Should be shared between multiple trees of a database.
296    pub(crate) seqno: SharedSequenceNumberGenerator,
297
298    /// Sequence number watermark that is visible to readers.
299    ///
300    /// Used for MVCC snapshots and to control which updates are
301    /// observable in a given view of the database.
302    pub(crate) visible_seqno: SharedSequenceNumberGenerator,
303}
304
305// TODO: remove default?
306impl Default for Config {
307    fn default() -> Self {
308        Self {
309            path: absolute_path(Path::new(DEFAULT_FILE_FOLDER)),
310            fs: Arc::new(StdFs),
311            descriptor_table: Some(Arc::new(DescriptorTable::new(256))),
312            seqno: SharedSequenceNumberGenerator::from(SequenceNumberCounter::default()),
313            visible_seqno: SharedSequenceNumberGenerator::from(SequenceNumberCounter::default()),
314
315            cache: Arc::new(Cache::with_capacity_bytes(
316                /* 16 MiB */ 16 * 1_024 * 1_024,
317            )),
318
319            data_block_restart_interval_policy: RestartIntervalPolicy::all(16),
320            index_block_restart_interval_policy: RestartIntervalPolicy::all(1),
321
322            level_count: DEFAULT_LEVEL_COUNT,
323
324            data_block_size_policy: BlockSizePolicy::all(4_096),
325
326            index_block_pinning_policy: PinningPolicy::new([true, true, false]),
327            filter_block_pinning_policy: PinningPolicy::new([true, false]),
328
329            top_level_index_block_pinning_policy: PinningPolicy::all(true), // TODO: implement
330            top_level_filter_block_pinning_policy: PinningPolicy::all(true), // TODO: implement
331
332            index_block_partitioning_policy: PinningPolicy::new([false, false, false, true]),
333            filter_block_partitioning_policy: PinningPolicy::new([false, false, false, true]),
334
335            index_block_partition_size_policy: BlockSizePolicy::all(4_096), // TODO: implement
336            filter_block_partition_size_policy: BlockSizePolicy::all(4_096), // TODO: implement
337
338            data_block_compression_policy: ({
339                #[cfg(feature = "lz4")]
340                let c = CompressionPolicy::new([CompressionType::None, CompressionType::Lz4]);
341
342                #[cfg(not(feature = "lz4"))]
343                let c = CompressionPolicy::new([CompressionType::None]);
344
345                c
346            }),
347            index_block_compression_policy: CompressionPolicy::all(CompressionType::None),
348
349            data_block_hash_ratio_policy: HashRatioPolicy::all(0.0),
350
351            filter_policy: FilterPolicy::all(FilterPolicyEntry::Bloom(
352                BloomConstructionPolicy::BitsPerKey(10.0),
353            )),
354
355            compaction_filter_factory: None,
356            merge_operator: None,
357
358            prefix_extractor: None,
359
360            expect_point_read_hits: false,
361
362            kv_separation_opts: None,
363
364            #[cfg(feature = "zstd")]
365            zstd_dictionary: None,
366
367            comparator: comparator::default_comparator(),
368            encryption: None,
369        }
370    }
371}
372
373impl Config {
374    /// Initializes a new config
375    pub fn new<P: AsRef<Path>>(
376        path: P,
377        seqno: SequenceNumberCounter,
378        visible_seqno: SequenceNumberCounter,
379    ) -> Self {
380        Self {
381            path: absolute_path(path.as_ref()),
382            seqno: Arc::new(seqno),
383            visible_seqno: Arc::new(visible_seqno),
384            ..Default::default()
385        }
386    }
387
388    /// Opens a tree using the config.
389    ///
390    /// # Errors
391    ///
392    /// Will return `Err` if an IO error occurs.
393    /// Returns [`Error::ZstdDictMismatch`](crate::Error::ZstdDictMismatch) if
394    /// the compression policy references a `dict_id` that doesn't match the
395    /// configured dictionary.
396    pub fn open(self) -> crate::Result<AnyTree> {
397        #[cfg(feature = "zstd")]
398        self.validate_zstd_dictionary()?;
399
400        Ok(if self.kv_separation_opts.is_some() {
401            AnyTree::Blob(BlobTree::open(self)?)
402        } else {
403            AnyTree::Standard(Tree::open(self)?)
404        })
405    }
406
407    /// Validates that every `ZstdDict` entry in compression policies references
408    /// a `dict_id` that matches the configured dictionary. Catches mismatches
409    /// at open time rather than at first block write/read.
410    #[cfg(feature = "zstd")]
411    fn validate_zstd_dictionary(&self) -> crate::Result<()> {
412        let dict_id = self.zstd_dictionary.as_ref().map(|d| d.id());
413
414        // NOTE: Only data block policies are validated. Index blocks never
415        // carry a dictionary — Writer::use_index_block_compression() downgrades
416        // ZstdDict to plain Zstd. Validating index policies here would reject
417        // configs that use ZstdDict solely for index blocks even though the
418        // writer handles them correctly.
419        for ct in self.data_block_compression_policy.iter() {
420            if let &CompressionType::ZstdDict {
421                dict_id: required, ..
422            } = ct
423            {
424                match dict_id {
425                    None => {
426                        return Err(crate::Error::ZstdDictMismatch {
427                            expected: required,
428                            got: None,
429                        });
430                    }
431                    Some(actual) if actual != required => {
432                        return Err(crate::Error::ZstdDictMismatch {
433                            expected: required,
434                            got: Some(actual),
435                        });
436                    }
437                    _ => {}
438                }
439            }
440        }
441
442        // Blob files don't support dictionary compression — reject early.
443        if let Some(ref kv_opts) = self.kv_separation_opts {
444            if matches!(kv_opts.compression, CompressionType::ZstdDict { .. }) {
445                return Err(crate::Error::Io(std::io::Error::new(
446                    std::io::ErrorKind::Unsupported,
447                    "zstd dictionary compression is not supported for blob files",
448                )));
449            }
450        }
451
452        Ok(())
453    }
454
455    /// Like [`Config::new`], but accepts pre-built shared generators.
456    ///
457    /// This is useful when the caller already has
458    /// [`SharedSequenceNumberGenerator`] instances (e.g., from a higher-level
459    /// database that shares generators across multiple trees).
460    pub fn new_with_generators<P: AsRef<Path>>(
461        path: P,
462        seqno: SharedSequenceNumberGenerator,
463        visible_seqno: SharedSequenceNumberGenerator,
464    ) -> Self {
465        Self {
466            path: absolute_path(path.as_ref()),
467            seqno,
468            visible_seqno,
469            ..Default::default()
470        }
471    }
472}
473
474impl<F: Fs> Config<F> {
475    /// Overrides the sequence number generator.
476    ///
477    /// By default, [`SequenceNumberCounter`] is used. This allows plugging in
478    /// a custom generator (e.g., HLC for distributed databases).
479    #[must_use]
480    pub fn seqno_generator(mut self, generator: SharedSequenceNumberGenerator) -> Self {
481        self.seqno = generator;
482        self
483    }
484
485    /// Overrides the visible sequence number generator.
486    #[must_use]
487    pub fn visible_seqno_generator(mut self, generator: SharedSequenceNumberGenerator) -> Self {
488        self.visible_seqno = generator;
489        self
490    }
491
492    /// Sets the global cache.
493    ///
494    /// You can create a global [`Cache`] and share it between multiple
495    /// trees to cap global cache memory usage.
496    ///
497    /// Defaults to a cache with 16 MiB of capacity *per tree*.
498    #[must_use]
499    pub fn use_cache(mut self, cache: Arc<Cache>) -> Self {
500        self.cache = cache;
501        self
502    }
503
504    /// Sets the file descriptor cache.
505    ///
506    /// Can be shared across trees.
507    #[must_use]
508    pub fn use_descriptor_table(mut self, descriptor_table: Option<Arc<DescriptorTable>>) -> Self {
509        self.descriptor_table = descriptor_table;
510        self
511    }
512
513    /// If `true`, the last level will not build filters, reducing the filter size of a database
514    /// by ~90% typically.
515    ///
516    /// **Enable this only if you know that point reads generally are expected to find a key-value pair.**
517    #[must_use]
518    pub fn expect_point_read_hits(mut self, b: bool) -> Self {
519        self.expect_point_read_hits = b;
520        self
521    }
522
523    /// Sets the partitioning policy for filter blocks.
524    #[must_use]
525    pub fn filter_block_partitioning_policy(mut self, policy: PinningPolicy) -> Self {
526        self.filter_block_partitioning_policy = policy;
527        self
528    }
529
530    /// Sets the partitioning policy for index blocks.
531    #[must_use]
532    pub fn index_block_partitioning_policy(mut self, policy: PinningPolicy) -> Self {
533        self.index_block_partitioning_policy = policy;
534        self
535    }
536
537    /// Sets the pinning policy for filter blocks.
538    #[must_use]
539    pub fn filter_block_pinning_policy(mut self, policy: PinningPolicy) -> Self {
540        self.filter_block_pinning_policy = policy;
541        self
542    }
543
544    /// Sets the pinning policy for index blocks.
545    #[must_use]
546    pub fn index_block_pinning_policy(mut self, policy: PinningPolicy) -> Self {
547        self.index_block_pinning_policy = policy;
548        self
549    }
550
551    /// Sets the restart interval inside data blocks.
552    ///
553    /// A higher restart interval saves space while increasing lookup times
554    /// inside data blocks.
555    ///
556    /// Default = 16
557    #[must_use]
558    pub fn data_block_restart_interval_policy(mut self, policy: RestartIntervalPolicy) -> Self {
559        self.data_block_restart_interval_policy = policy;
560        self
561    }
562
563    // TODO: not supported yet in index blocks
564    // /// Sets the restart interval inside index blocks.
565    // ///
566    // /// A higher restart interval saves space while increasing lookup times
567    // /// inside index blocks.
568    // ///
569    // /// Default = 1
570    // #[must_use]
571    // pub fn index_block_restart_interval_policy(mut self, policy: RestartIntervalPolicy) -> Self {
572    //     self.index_block_restart_interval_policy = policy;
573    //     self
574    // }
575
576    /// Sets the filter construction policy.
577    #[must_use]
578    pub fn filter_policy(mut self, policy: FilterPolicy) -> Self {
579        self.filter_policy = policy;
580        self
581    }
582
583    /// Sets the compression method for data blocks.
584    #[must_use]
585    pub fn data_block_compression_policy(mut self, policy: CompressionPolicy) -> Self {
586        self.data_block_compression_policy = policy;
587        self
588    }
589
590    /// Sets the compression method for index blocks.
591    #[must_use]
592    pub fn index_block_compression_policy(mut self, policy: CompressionPolicy) -> Self {
593        self.index_block_compression_policy = policy;
594        self
595    }
596
597    // TODO: level count is fixed to 7 right now
598    // /// Sets the number of levels of the LSM tree (depth of tree).
599    // ///
600    // /// Defaults to 7, like `LevelDB` and `RocksDB`.
601    // ///
602    // /// Cannot be changed once set.
603    // ///
604    // /// # Panics
605    // ///
606    // /// Panics if `n` is 0.
607    // #[must_use]
608    // pub fn level_count(mut self, n: u8) -> Self {
609    //     assert!(n > 0);
610
611    //     self.level_count = n;
612    //     self
613    // }
614
615    /// Sets the data block size policy.
616    #[must_use]
617    pub fn data_block_size_policy(mut self, policy: BlockSizePolicy) -> Self {
618        self.data_block_size_policy = policy;
619        self
620    }
621
622    /// Sets the hash ratio policy for data blocks.
623    ///
624    /// If greater than 0.0, a hash index is embedded into data blocks that can speed up reads
625    /// inside the data block.
626    #[must_use]
627    pub fn data_block_hash_ratio_policy(mut self, policy: HashRatioPolicy) -> Self {
628        self.data_block_hash_ratio_policy = policy;
629        self
630    }
631
632    /// Toggles key-value separation.
633    #[must_use]
634    pub fn with_kv_separation(mut self, opts: Option<KvSeparationOptions>) -> Self {
635        self.kv_separation_opts = opts;
636        self
637    }
638
639    /// Installs a custom compaction filter.
640    #[must_use]
641    pub fn with_compaction_filter_factory(mut self, factory: Option<Arc<dyn Factory>>) -> Self {
642        self.compaction_filter_factory = factory;
643        self
644    }
645
646    /// Sets the prefix extractor for prefix bloom filters.
647    ///
648    /// When configured, bloom filters will index key prefixes returned by
649    /// the extractor. Prefix scans can then skip segments whose bloom
650    /// filter reports no match for the scan prefix.
651    #[must_use]
652    pub fn prefix_extractor(mut self, extractor: Arc<dyn PrefixExtractor>) -> Self {
653        self.prefix_extractor = Some(extractor);
654        self
655    }
656
657    /// Installs a merge operator for commutative operations.
658    ///
659    /// When set, enables [`crate::AbstractTree::merge`] which stores partial updates
660    /// (operands) that are lazily combined during reads and compaction.
661    #[must_use]
662    pub fn with_merge_operator(mut self, op: Option<Arc<dyn MergeOperator>>) -> Self {
663        self.merge_operator = op;
664        self
665    }
666
667    /// Sets a custom user key comparator.
668    ///
669    /// When configured, all key ordering (memtable, block index, merge,
670    /// range scans) uses this comparator instead of the default lexicographic
671    /// byte ordering.
672    ///
673    /// # Important
674    ///
675    /// The comparator's [`crate::UserComparator::name`] is persisted when a tree is
676    /// first created. On subsequent opens the stored name is compared against
677    /// the supplied comparator's name — a mismatch causes the open to fail
678    /// with [`Error::ComparatorMismatch`](crate::Error::ComparatorMismatch).
679    #[must_use]
680    pub fn comparator(mut self, comparator: SharedComparator) -> Self {
681        self.comparator = comparator;
682        self
683    }
684
685    /// Sets the block-level encryption provider for encryption at rest.
686    ///
687    /// When set, all blocks written to SST files are encrypted after
688    /// compression and before checksumming, using the provided
689    /// [`EncryptionProvider`].
690    ///
691    /// The caller is responsible for key management and rotation.
692    /// See [`crate::Aes256GcmProvider`] (behind the `encryption` feature)
693    /// for a ready-to-use AES-256-GCM implementation.
694    ///
695    /// **Important constraints:**
696    /// - Encryption state is NOT recorded in SST metadata. Opening an
697    ///   encrypted tree without the correct provider (or vice versa) will
698    ///   cause block validation errors, not silent corruption.
699    /// - Blob files (KV-separated large values) are NOT covered by
700    ///   block-level encryption. Large values stored via KV separation
701    ///   remain in plaintext on disk.
702    #[must_use]
703    pub fn with_encryption(mut self, encryption: Option<Arc<dyn EncryptionProvider>>) -> Self {
704        self.encryption = encryption;
705        self
706    }
707
708    /// Sets the pre-trained zstd dictionary for dictionary compression.
709    ///
710    /// When set, data blocks using [`CompressionType::ZstdDict`] will be
711    /// compressed and decompressed with this dictionary. The dictionary
712    /// should be trained on representative data samples for best results.
713    ///
714    /// Create a dictionary with [`ZstdDictionary::new`](crate::ZstdDictionary::new),
715    /// then use [`CompressionType::zstd_dict`] to create a matching
716    /// compression type:
717    ///
718    /// ```ignore
719    /// use lsm_tree::{CompressionType, ZstdDictionary};
720    ///
721    /// let dict = ZstdDictionary::new(&training_data);
722    /// let compression = CompressionType::zstd_dict(3, dict.id()).unwrap();
723    ///
724    /// config
725    ///     .zstd_dictionary(Some(Arc::new(dict)))
726    ///     .data_block_compression_policy(CompressionPolicy::all(compression));
727    /// ```
728    #[cfg(feature = "zstd")]
729    #[must_use]
730    pub fn zstd_dictionary(
731        mut self,
732        dictionary: Option<Arc<crate::compression::ZstdDictionary>>,
733    ) -> Self {
734        self.zstd_dictionary = dictionary;
735        self
736    }
737}