rusty_leveldb/
cmp.rs

1use crate::key_types::{self, LookupKey};
2use crate::types;
3
4use std::cmp::Ordering;
5use std::rc::Rc;
6
7type WrappedCmp = Rc<Box<dyn Cmp>>;
8
9/// Comparator trait, supporting types that can be nested (i.e., add additional functionality on
10/// top of an inner comparator)
11pub trait Cmp {
12    /// Compare to byte strings, bytewise.
13    fn cmp(&self, a: &[u8], b: &[u8]) -> Ordering;
14
15    /// Return the shortest byte string that compares "Greater" to the first argument and "Less" to
16    /// the second one.
17    fn find_shortest_sep(&self, from: &[u8], to: &[u8]) -> Vec<u8>;
18    /// Return the shortest byte string that compares "Greater" to the argument.
19    fn find_short_succ(&self, key: &[u8]) -> Vec<u8>;
20
21    /// A unique identifier for a comparator. A comparator wrapper (like InternalKeyCmp) may
22    /// return the id of its inner comparator.
23    fn id(&self) -> &'static str;
24}
25
26/// The default byte-wise comparator.
27#[derive(Clone)]
28pub struct DefaultCmp;
29
30impl Cmp for DefaultCmp {
31    fn cmp(&self, a: &[u8], b: &[u8]) -> Ordering {
32        a.cmp(b)
33    }
34
35    fn id(&self) -> &'static str {
36        "leveldb.BytewiseComparator"
37    }
38
39    fn find_shortest_sep(&self, a: &[u8], b: &[u8]) -> Vec<u8> {
40        if a == b {
41            return a.to_vec();
42        }
43
44        let min = if a.len() < b.len() { a.len() } else { b.len() };
45        let mut diff_at = 0;
46
47        while diff_at < min && a[diff_at] == b[diff_at] {
48            diff_at += 1;
49        }
50
51        // First, try to find a short separator. If that fails, try a backup mechanism below.
52        while diff_at < min {
53            let diff = a[diff_at];
54            if diff < 0xff && diff + 1 < b[diff_at] {
55                let mut sep = Vec::from(&a[0..diff_at + 1]);
56                sep[diff_at] += 1;
57                assert!(self.cmp(&sep, b) == Ordering::Less);
58                return sep;
59            }
60
61            diff_at += 1;
62        }
63
64        let mut sep = Vec::with_capacity(a.len() + 1);
65        sep.extend_from_slice(a);
66        // Try increasing a and check if it's still smaller than b. First find the last byte
67        // smaller than 0xff, and then increment that byte. Only if the separator is lesser than b,
68        // return it.
69        let mut i = a.len() - 1;
70        while i > 0 && sep[i] == 0xff {
71            i -= 1;
72        }
73        if sep[i] < 0xff {
74            sep[i] += 1;
75            if self.cmp(&sep, b) == Ordering::Less {
76                return sep;
77            } else {
78                sep[i] -= 1;
79            }
80        }
81
82        // Backup case: either `a` is full of 0xff, or all different places are less than 2
83        // characters apart.
84        // The result is not necessarily short, but a good separator: e.g., "abc" vs "abd" ->
85        // "abc\0", which is greater than abc and lesser than abd.
86        // Append a 0 byte; by making it longer than a, it will compare greater to it.
87        sep.extend_from_slice(&[0]);
88        sep
89    }
90
91    fn find_short_succ(&self, a: &[u8]) -> Vec<u8> {
92        let mut result = a.to_vec();
93        for i in 0..a.len() {
94            if a[i] != 0xff {
95                result[i] += 1;
96                result.resize(i + 1, 0);
97                return result;
98            }
99        }
100        // Rare path
101        result.push(255);
102        result
103    }
104}
105
106/// Same as memtable_key_cmp, but for InternalKeys.
107#[derive(Clone)]
108pub struct InternalKeyCmp(pub Rc<Box<dyn Cmp>>);
109
110impl Cmp for InternalKeyCmp {
111    fn cmp(&self, a: &[u8], b: &[u8]) -> Ordering {
112        key_types::cmp_internal_key(self.0.as_ref().as_ref(), a, b)
113    }
114
115    fn id(&self) -> &'static str {
116        self.0.id()
117    }
118
119    fn find_shortest_sep(&self, a: &[u8], b: &[u8]) -> Vec<u8> {
120        if a == b {
121            return a.to_vec();
122        }
123
124        let (_, seqa, keya) = key_types::parse_internal_key(a);
125        let (_, _, keyb) = key_types::parse_internal_key(b);
126
127        let sep: Vec<u8> = self.0.find_shortest_sep(keya, keyb);
128
129        if sep.len() < keya.len() && self.0.cmp(keya, &sep) == Ordering::Less {
130            return LookupKey::new(&sep, types::MAX_SEQUENCE_NUMBER)
131                .internal_key()
132                .to_vec();
133        }
134        LookupKey::new(&sep, seqa).internal_key().to_vec()
135    }
136
137    fn find_short_succ(&self, a: &[u8]) -> Vec<u8> {
138        let (_, seq, key) = key_types::parse_internal_key(a);
139        let succ: Vec<u8> = self.0.find_short_succ(key);
140        LookupKey::new(&succ, seq).internal_key().to_vec()
141    }
142}
143
144impl InternalKeyCmp {
145    /// cmp_inner compares a and b using the underlying comparator (the "user comparator").
146    pub fn cmp_inner(&self, a: &[u8], b: &[u8]) -> Ordering {
147        self.0.cmp(a, b)
148    }
149}
150
151/// An internal comparator wrapping a user-supplied comparator. This comparator is used to compare
152/// memtable keys, which contain length prefixes and a sequence number.
153/// The ordering is determined by asking the wrapped comparator; ties are broken by *reverse*
154/// ordering the sequence numbers. (This means that when having an entry abx/4 and seRching for
155/// abx/5, then abx/4 is counted as "greater-or-equal", making snapshot functionality work at all)
156#[derive(Clone)]
157pub struct MemtableKeyCmp(pub Rc<Box<dyn Cmp>>);
158
159impl Cmp for MemtableKeyCmp {
160    fn cmp(&self, a: &[u8], b: &[u8]) -> Ordering {
161        key_types::cmp_memtable_key(self.0.as_ref().as_ref(), a, b)
162    }
163
164    fn id(&self) -> &'static str {
165        self.0.id()
166    }
167
168    // The following two impls should not be used (by principle) although they should be correct.
169    // They will crash the program.
170    fn find_shortest_sep(&self, _: &[u8], _: &[u8]) -> Vec<u8> {
171        panic!("find* functions are invalid on MemtableKeyCmp");
172    }
173
174    fn find_short_succ(&self, _: &[u8]) -> Vec<u8> {
175        panic!("find* functions are invalid on MemtableKeyCmp");
176    }
177}
178
179#[cfg(test)]
180mod tests {
181    use super::*;
182    use key_types::LookupKey;
183
184    #[test]
185    fn test_cmp_defaultcmp_shortest_sep() {
186        assert_eq!(
187            DefaultCmp.find_shortest_sep("abcd".as_bytes(), "abcf".as_bytes()),
188            "abce".as_bytes()
189        );
190        assert_eq!(
191            DefaultCmp.find_shortest_sep("abc".as_bytes(), "acd".as_bytes()),
192            "abd".as_bytes()
193        );
194        assert_eq!(
195            DefaultCmp.find_shortest_sep("abcdefghi".as_bytes(), "abcffghi".as_bytes()),
196            "abce".as_bytes()
197        );
198        assert_eq!(
199            DefaultCmp.find_shortest_sep("a".as_bytes(), "a".as_bytes()),
200            "a".as_bytes()
201        );
202        assert_eq!(
203            DefaultCmp.find_shortest_sep("a".as_bytes(), "b".as_bytes()),
204            "a\0".as_bytes()
205        );
206        assert_eq!(
207            DefaultCmp.find_shortest_sep("abc".as_bytes(), "zzz".as_bytes()),
208            "b".as_bytes()
209        );
210        assert_eq!(
211            DefaultCmp.find_shortest_sep("yyy".as_bytes(), "z".as_bytes()),
212            "yyz".as_bytes()
213        );
214        assert_eq!(
215            DefaultCmp.find_shortest_sep("".as_bytes(), "".as_bytes()),
216            "".as_bytes()
217        );
218    }
219
220    #[test]
221    fn test_cmp_defaultcmp_short_succ() {
222        assert_eq!(
223            DefaultCmp.find_short_succ("abcd".as_bytes()),
224            "b".as_bytes()
225        );
226        assert_eq!(
227            DefaultCmp.find_short_succ("zzzz".as_bytes()),
228            "{".as_bytes()
229        );
230        assert_eq!(DefaultCmp.find_short_succ(&[]), &[0xff]);
231        assert_eq!(
232            DefaultCmp.find_short_succ(&[0xff, 0xff, 0xff]),
233            &[0xff, 0xff, 0xff, 0xff]
234        );
235    }
236
237    #[test]
238    fn test_cmp_internalkeycmp_shortest_sep() {
239        let cmp = InternalKeyCmp(Rc::new(Box::new(DefaultCmp)));
240        assert_eq!(
241            cmp.find_shortest_sep(
242                LookupKey::new("abcd".as_bytes(), 1).internal_key(),
243                LookupKey::new("abcf".as_bytes(), 2).internal_key()
244            ),
245            LookupKey::new("abce".as_bytes(), 1).internal_key()
246        );
247        assert_eq!(
248            cmp.find_shortest_sep(
249                LookupKey::new("abcd".as_bytes(), 1).internal_key(),
250                LookupKey::new("abce".as_bytes(), 2).internal_key()
251            ),
252            LookupKey::new("abcd\0".as_bytes(), 1).internal_key()
253        );
254        assert_eq!(
255            cmp.find_shortest_sep(
256                LookupKey::new("abc".as_bytes(), 1).internal_key(),
257                LookupKey::new("zzz".as_bytes(), 2).internal_key()
258            ),
259            LookupKey::new("b".as_bytes(), types::MAX_SEQUENCE_NUMBER).internal_key()
260        );
261        assert_eq!(
262            cmp.find_shortest_sep(
263                LookupKey::new("abc".as_bytes(), 1).internal_key(),
264                LookupKey::new("acd".as_bytes(), 2).internal_key()
265            ),
266            LookupKey::new("abd".as_bytes(), 1).internal_key()
267        );
268        assert_eq!(
269            cmp.find_shortest_sep(
270                LookupKey::new("abc".as_bytes(), 1).internal_key(),
271                LookupKey::new("abe".as_bytes(), 2).internal_key()
272            ),
273            LookupKey::new("abd".as_bytes(), 1).internal_key()
274        );
275        assert_eq!(
276            cmp.find_shortest_sep(
277                LookupKey::new("".as_bytes(), 1).internal_key(),
278                LookupKey::new("".as_bytes(), 2).internal_key()
279            ),
280            LookupKey::new("".as_bytes(), 1).internal_key()
281        );
282        assert_eq!(
283            cmp.find_shortest_sep(
284                LookupKey::new("abc".as_bytes(), 2).internal_key(),
285                LookupKey::new("abc".as_bytes(), 2).internal_key()
286            ),
287            LookupKey::new("abc".as_bytes(), 2).internal_key()
288        );
289    }
290
291    #[test]
292    fn test_cmp_internalkeycmp() {
293        let cmp = InternalKeyCmp(Rc::new(Box::new(DefaultCmp)));
294        // a < b < c
295        let a = LookupKey::new("abc".as_bytes(), 2).internal_key().to_vec();
296        let b = LookupKey::new("abc".as_bytes(), 1).internal_key().to_vec();
297        let c = LookupKey::new("abd".as_bytes(), 3).internal_key().to_vec();
298        let d = "xyy".as_bytes();
299        let e = "xyz".as_bytes();
300
301        assert_eq!(Ordering::Less, cmp.cmp(&a, &b));
302        assert_eq!(Ordering::Equal, cmp.cmp(&a, &a));
303        assert_eq!(Ordering::Greater, cmp.cmp(&b, &a));
304        assert_eq!(Ordering::Less, cmp.cmp(&a, &c));
305        assert_eq!(Ordering::Less, cmp.cmp_inner(d, e));
306        assert_eq!(Ordering::Greater, cmp.cmp_inner(e, d));
307    }
308
309    #[test]
310    #[should_panic]
311    fn test_cmp_memtablekeycmp_panics() {
312        let cmp = MemtableKeyCmp(Rc::new(Box::new(DefaultCmp)));
313        cmp.cmp(&[1, 2, 3], &[4, 5, 6]);
314    }
315}