raindb/versioning/
utils.rs

1// Copyright (c) 2021 Google LLC
2//
3// Use of this source code is governed by an MIT-style
4// license that can be found in the LICENSE file or at
5// https://opensource.org/licenses/MIT.
6
7//! Contains utilities used for various versioning operations.
8
9use std::sync::Arc;
10
11use crate::key::InternalKey;
12
13use super::file_metadata::FileMetadata;
14
15/// Sums the file sizes for the specified vector of file metadata.
16pub(crate) fn sum_file_sizes<F: AsRef<FileMetadata>>(files: &[F]) -> u64 {
17    files
18        .iter()
19        .map(|metadata| metadata.as_ref().get_file_size())
20        .sum()
21}
22
23/**
24Binary search a sorted set of disjoint files for a file whose largest key forms a tight upper
25bound on the target key.
26
27Returns the index of the file whose key range creates an upper bound on the target key (i.e.
28its largest key is greater than or equal the target). Otherwise returns `None`.
29
30# Invariants
31
32The passed in `files` **must** be a sorted set and the files must store key ranges that do
33not overlap with the key ranges in any other file.
34
35# Legacy
36
37This is synonomous with LevelDB's `leveldb::FindFile` method.
38*/
39pub(crate) fn find_file_with_upper_bound_range(
40    files: &[Arc<FileMetadata>],
41    target_user_key: &InternalKey,
42) -> Option<usize> {
43    let mut left: usize = 0;
44    let mut right: usize = files.len();
45    while left < right {
46        let mid: usize = (left + right) / 2;
47        let file = &files[mid];
48
49        if file.largest_key() < target_user_key {
50            // The largest key in the file at mid is less than the target, so the set of files
51            // at or before mid are not interesting.
52            left = mid + 1;
53        } else {
54            // The largest key in the file at mid is greater than or equal to the target, so
55            // the set of files after mid are not interesting.
56            right = mid;
57        }
58    }
59
60    if left == files.len() {
61        return None;
62    }
63
64    Some(left)
65}
66
67#[cfg(test)]
68mod find_file_with_upper_bound_range_tests {
69    use pretty_assertions::assert_eq;
70
71    use crate::Operation;
72
73    use super::*;
74
75    #[test]
76    fn returns_nothing_for_an_empty_list() {
77        let files: Vec<Arc<FileMetadata>> = vec![];
78        let actual = find_file_with_upper_bound_range(
79            &files,
80            &InternalKey::new(b"bobo".to_vec(), 30, Operation::Put),
81        );
82
83        assert_eq!(actual, None);
84    }
85
86    #[test]
87    fn when_there_is_only_one_file_returns_the_index_of_the_file_if_it_exists() {
88        let mut files: Vec<Arc<FileMetadata>> = vec![];
89        insert_files_with_largest_keys(&mut files, vec![b"batmann".to_vec()]);
90
91        assert_eq!(
92            find_file_with_upper_bound_range(
93                &files,
94                &InternalKey::new(b"batmann".to_vec(), 30, Operation::Put),
95            )
96            .unwrap(),
97            0
98        );
99        assert_eq!(
100            find_file_with_upper_bound_range(
101                &files,
102                &InternalKey::new(b"bat".to_vec(), 30, Operation::Put),
103            )
104            .unwrap(),
105            0,
106            "The target is not a boundary but is included in a file's key range so return the \
107            index of that file."
108        );
109        assert_eq!(
110            find_file_with_upper_bound_range(
111                &files,
112                &InternalKey::new(b"bb".to_vec(), 30, Operation::Put),
113            ),
114            None,
115            "The target key is larger than every other key so we return None since no file's range \
116            bounds the target from above."
117        );
118    }
119
120    #[test]
121    fn when_there_are_multiple_files_returns_the_index_of_the_file_if_it_exists() {
122        let mut files: Vec<Arc<FileMetadata>> = vec![];
123        insert_files_with_largest_keys(
124            &mut files,
125            vec![
126                b"batmann".to_vec(),
127                b"blue".to_vec(),
128                b"bobo".to_vec(),
129                b"patty".to_vec(),
130                b"robin".to_vec(),
131            ],
132        );
133
134        assert_eq!(
135            find_file_with_upper_bound_range(&files, &create_testing_key(b"apple".to_vec()))
136                .unwrap(),
137            0,
138            "The target is not a boundary but is included in a file's key range so return the \
139            index of that file."
140        );
141        assert_eq!(
142            find_file_with_upper_bound_range(
143                &files,
144                &InternalKey::new(b"bat".to_vec(), 30, Operation::Put),
145            )
146            .unwrap(),
147            0,
148            "The target is not a boundary but is included in a file's key range so return the \
149            index of that file."
150        );
151        assert_eq!(
152            find_file_with_upper_bound_range(
153                &files,
154                &InternalKey::new(b"batmann".to_vec(), 30, Operation::Put),
155            )
156            .unwrap(),
157            0
158        );
159
160        assert_eq!(
161            find_file_with_upper_bound_range(&files, &create_testing_key(b"blue".to_vec()))
162                .unwrap(),
163            1
164        );
165        assert_eq!(
166            find_file_with_upper_bound_range(
167                &files,
168                &InternalKey::new(b"bb".to_vec(), 30, Operation::Put),
169            )
170            .unwrap(),
171            1,
172            "The target is not a boundary but is included in a file's key range so return the \
173            index of that file."
174        );
175
176        assert_eq!(
177            find_file_with_upper_bound_range(&files, &create_testing_key(b"bobo".to_vec()))
178                .unwrap(),
179            2
180        );
181        assert_eq!(
182            find_file_with_upper_bound_range(&files, &create_testing_key(b"bm".to_vec())).unwrap(),
183            2,
184            "The target is not a boundary but is included in a file's key range so return the \
185            index of that file."
186        );
187
188        assert_eq!(
189            find_file_with_upper_bound_range(&files, &create_testing_key(b"patty".to_vec()))
190                .unwrap(),
191            3
192        );
193        assert_eq!(
194            find_file_with_upper_bound_range(&files, &create_testing_key(b"robin".to_vec()))
195                .unwrap(),
196            4
197        );
198
199        assert_eq!(
200            find_file_with_upper_bound_range(&files, &create_testing_key(b"s".to_vec())),
201            None,
202            "The target key is larger than every other key so we return None since no file's range \
203            bounds the target from above."
204        );
205    }
206
207    /**
208    Add files with the largest key set to [`InternalKey`]'s with the specified user keys to the
209    provided metadata vector.
210    */
211    fn insert_files_with_largest_keys(files: &mut Vec<Arc<FileMetadata>>, user_keys: Vec<Vec<u8>>) {
212        for user_key in user_keys.into_iter() {
213            let mut file = FileMetadata::new(30);
214            file.set_largest_key(Some(create_testing_key(user_key)));
215            files.push(Arc::new(file));
216        }
217    }
218
219    /// Create an [`InternalKey`] based on the provided user key for testing.
220    fn create_testing_key(user_key: Vec<u8>) -> InternalKey {
221        InternalKey::new(user_key, 30, Operation::Put)
222    }
223}
224
225#[cfg(test)]
226mod sum_file_sizes_tests {
227    use pretty_assertions::assert_eq;
228
229    use super::*;
230
231    #[test]
232    fn can_sum_objects_convertible_to_a_file_metadata_reference() {
233        let mut files: Vec<Arc<FileMetadata>> = vec![];
234        assert_eq!(sum_file_sizes(&files), 0);
235
236        for idx in 0..5 {
237            let mut file = FileMetadata::new(idx + 30);
238            file.set_file_size(10);
239            files.push(Arc::new(file));
240        }
241
242        assert_eq!(sum_file_sizes(&files), 50);
243    }
244}