Skip to main content

rouchdb_core/
collation.rs

1/// CouchDB collation order.
2///
3/// CouchDB (and PouchDB) use an Erlang-derived ordering for keys:
4///
5/// ```text
6/// null < boolean < number < string < array < object
7/// ```
8///
9/// This module provides comparison and encoding functions that match this
10/// ordering, ensuring consistent behavior across local storage and remote
11/// CouchDB instances.
12use serde_json::Value;
13use std::cmp::Ordering;
14
15// ---------------------------------------------------------------------------
16// Type ranking
17// ---------------------------------------------------------------------------
18
19/// Numeric type rank matching CouchDB collation.
20fn type_rank(v: &Value) -> u8 {
21    match v {
22        Value::Null => 1,
23        Value::Bool(_) => 2,
24        Value::Number(_) => 3,
25        Value::String(_) => 4,
26        Value::Array(_) => 5,
27        Value::Object(_) => 6,
28    }
29}
30
31// ---------------------------------------------------------------------------
32// Comparison
33// ---------------------------------------------------------------------------
34
35/// Compare two JSON values using CouchDB collation order.
36pub fn collate(a: &Value, b: &Value) -> Ordering {
37    let rank_a = type_rank(a);
38    let rank_b = type_rank(b);
39
40    if rank_a != rank_b {
41        return rank_a.cmp(&rank_b);
42    }
43
44    match (a, b) {
45        (Value::Null, Value::Null) => Ordering::Equal,
46        (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
47        (Value::Number(a), Value::Number(b)) => compare_numbers(a, b),
48        (Value::String(a), Value::String(b)) => a.cmp(b),
49        (Value::Array(a), Value::Array(b)) => {
50            // Element-by-element, shorter arrays sort first
51            for (ea, eb) in a.iter().zip(b.iter()) {
52                match collate(ea, eb) {
53                    Ordering::Equal => continue,
54                    other => return other,
55                }
56            }
57            a.len().cmp(&b.len())
58        }
59        (Value::Object(a), Value::Object(b)) => {
60            // Key-by-key comparison; fewer keys sort first.
61            // Keys are sorted before comparison.
62            let mut keys_a: Vec<&String> = a.keys().collect();
63            let mut keys_b: Vec<&String> = b.keys().collect();
64            keys_a.sort();
65            keys_b.sort();
66
67            for (ka, kb) in keys_a.iter().zip(keys_b.iter()) {
68                match ka.cmp(kb) {
69                    Ordering::Equal => {}
70                    other => return other,
71                }
72                match collate(&a[*ka], &b[*kb]) {
73                    Ordering::Equal => continue,
74                    other => return other,
75                }
76            }
77            a.len().cmp(&b.len())
78        }
79        _ => Ordering::Equal, // Should be unreachable due to rank check
80    }
81}
82
83/// Compare two JSON numbers without losing precision on large integers.
84///
85/// `serde_json::Number` can hold `u64`, `i64`, or `f64`. Converting straight
86/// to `f64` (as a naive implementation does) collapses integers larger than
87/// 2^53 onto the same float, making distinct values compare `Equal` and
88/// breaking Mango `$eq`/range queries. When both operands are integers we
89/// compare them exactly via `i128`; otherwise we fall back to `f64`.
90fn compare_numbers(a: &serde_json::Number, b: &serde_json::Number) -> Ordering {
91    fn as_i128(n: &serde_json::Number) -> Option<i128> {
92        if let Some(i) = n.as_i64() {
93            Some(i as i128)
94        } else {
95            n.as_u64().map(|u| u as i128)
96        }
97    }
98
99    if let (Some(ia), Some(ib)) = (as_i128(a), as_i128(b)) {
100        return ia.cmp(&ib);
101    }
102    let fa = a.as_f64().unwrap_or(0.0);
103    let fb = b.as_f64().unwrap_or(0.0);
104    // total_cmp gives a well-defined ordering for all f64 values including NaN
105    // (which shouldn't appear in JSON but be safe).
106    fa.total_cmp(&fb)
107}
108
109// ---------------------------------------------------------------------------
110// Indexable string encoding
111// ---------------------------------------------------------------------------
112
113/// Encode a JSON value into a string that sorts lexicographically in CouchDB
114/// collation order. Used as keys in the storage engine.
115///
116/// Format:
117/// - Null:    `"1"`
118/// - Bool:    `"2F"` / `"2T"`
119/// - Number:  `"3"` + encoded number
120/// - String:  `"4"` + string value
121/// - Array:   `"5"` + encoded elements separated by null byte
122/// - Object:  `"6"` + encoded key-value pairs
123pub fn to_indexable_string(v: &Value) -> String {
124    let mut s = String::new();
125    encode_value(v, &mut s);
126    s
127}
128
129fn encode_value(v: &Value, out: &mut String) {
130    match v {
131        Value::Null => out.push('1'),
132        Value::Bool(b) => {
133            out.push('2');
134            out.push(if *b { 'T' } else { 'F' });
135        }
136        Value::Number(n) => {
137            out.push('3');
138            encode_number(n.as_f64().unwrap_or(0.0), out);
139        }
140        Value::String(s) => {
141            out.push('4');
142            out.push_str(s);
143        }
144        Value::Array(arr) => {
145            out.push('5');
146            for (i, elem) in arr.iter().enumerate() {
147                if i > 0 {
148                    out.push('\0');
149                }
150                encode_value(elem, out);
151            }
152        }
153        Value::Object(obj) => {
154            out.push('6');
155            let mut keys: Vec<&String> = obj.keys().collect();
156            keys.sort();
157            for (i, key) in keys.iter().enumerate() {
158                if i > 0 {
159                    out.push('\0');
160                }
161                out.push_str(key);
162                out.push('\0');
163                encode_value(&obj[*key], out);
164            }
165        }
166    }
167}
168
169/// Encode a number such that the resulting string sorts lexicographically
170/// in numeric order.
171///
172/// Scheme (matching PouchDB's `numToIndexableString`):
173/// - Negative numbers: `0` + inverted representation
174/// - Zero: `1`
175/// - Positive numbers: `2` + magnitude (zero-padded to 3 digits) + mantissa
176fn encode_number(n: f64, out: &mut String) {
177    if n.is_nan() {
178        out.push('0'); // Sort NaN before all real numbers
179        return;
180    }
181    if n == f64::NEG_INFINITY {
182        out.push('0');
183        out.push_str("00000");
184        return;
185    }
186    if n == f64::INFINITY {
187        out.push('2');
188        out.push_str("99999");
189        return;
190    }
191    if n == 0.0 {
192        out.push('1');
193        return;
194    }
195
196    let is_negative = n < 0.0;
197    let abs_n = n.abs();
198
199    // Represent as: mantissa * 10^exponent
200    // where 1 <= mantissa < 10
201    let exponent = abs_n.log10().floor() as i64;
202    let mantissa = abs_n / 10f64.powi(exponent as i32);
203
204    // We offset the exponent by 10000 so it's always positive and
205    // zero-pad to 5 digits for consistent lexicographic ordering.
206    if is_negative {
207        // For negatives: invert so larger (closer to zero) sorts first
208        // Prefix with '0', then (10000 - exponent), then (10 - mantissa)
209        out.push('0');
210        let inv_exp = 10000 - exponent;
211        out.push_str(&format!("{:0>5}", inv_exp));
212        let inv_mantissa = 10.0 - mantissa;
213        let m_str = format!("{:.10}", inv_mantissa);
214        out.push_str(m_str.trim_end_matches('0'));
215    } else {
216        // For positives: prefix with '2', then (exponent + 10000), then mantissa
217        out.push('2');
218        let adj_exp = exponent + 10000;
219        out.push_str(&format!("{:0>5}", adj_exp));
220        let m_str = format!("{:.10}", mantissa);
221        out.push_str(m_str.trim_end_matches('0'));
222    }
223}
224
225// ---------------------------------------------------------------------------
226// Tests
227// ---------------------------------------------------------------------------
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232    use serde_json::json;
233
234    #[test]
235    fn type_ordering() {
236        let values = vec![
237            json!(null),
238            json!(false),
239            json!(true),
240            json!(-1),
241            json!(0),
242            json!(1),
243            json!(1.5),
244            json!(""),
245            json!("a"),
246            json!("b"),
247            json!([]),
248            json!([1]),
249            json!({}),
250            json!({"a": 1}),
251        ];
252
253        for i in 0..values.len() {
254            for j in (i + 1)..values.len() {
255                assert_eq!(
256                    collate(&values[i], &values[j]),
257                    Ordering::Less,
258                    "{:?} should be less than {:?}",
259                    values[i],
260                    values[j]
261                );
262            }
263        }
264    }
265
266    #[test]
267    fn null_equality() {
268        assert_eq!(collate(&json!(null), &json!(null)), Ordering::Equal);
269    }
270
271    #[test]
272    fn bool_ordering() {
273        assert_eq!(collate(&json!(false), &json!(true)), Ordering::Less);
274        assert_eq!(collate(&json!(true), &json!(true)), Ordering::Equal);
275    }
276
277    #[test]
278    fn number_ordering() {
279        assert_eq!(collate(&json!(-100), &json!(-1)), Ordering::Less);
280        assert_eq!(collate(&json!(0), &json!(1)), Ordering::Less);
281        assert_eq!(collate(&json!(1), &json!(2)), Ordering::Less);
282        assert_eq!(collate(&json!(1.5), &json!(2)), Ordering::Less);
283    }
284
285    #[test]
286    fn large_integer_precision() {
287        // Distinct integers beyond f64's 2^53 exact range must not collapse
288        // to Equal (regression: naive as_f64 comparison loses precision).
289        let a = json!(9_007_199_254_740_992_i64); // 2^53
290        let b = json!(9_007_199_254_740_993_i64); // 2^53 + 1
291        assert_eq!(collate(&a, &b), Ordering::Less);
292        assert_eq!(collate(&b, &a), Ordering::Greater);
293        assert_eq!(collate(&a, &a), Ordering::Equal);
294
295        // u64 above i64::MAX vs a smaller value.
296        let big = json!(u64::MAX);
297        let small = json!(1_i64);
298        assert_eq!(collate(&small, &big), Ordering::Less);
299
300        // Mixed integer / float still orders correctly.
301        assert_eq!(collate(&json!(2_i64), &json!(1.5)), Ordering::Greater);
302    }
303
304    #[test]
305    fn string_ordering() {
306        assert_eq!(collate(&json!("a"), &json!("b")), Ordering::Less);
307        assert_eq!(collate(&json!("aa"), &json!("b")), Ordering::Less);
308    }
309
310    #[test]
311    fn array_ordering() {
312        assert_eq!(collate(&json!([]), &json!([1])), Ordering::Less);
313        assert_eq!(collate(&json!([1]), &json!([2])), Ordering::Less);
314        assert_eq!(collate(&json!([1]), &json!([1, 2])), Ordering::Less);
315    }
316
317    #[test]
318    fn object_ordering() {
319        assert_eq!(collate(&json!({}), &json!({"a": 1})), Ordering::Less);
320        assert_eq!(collate(&json!({"a": 1}), &json!({"a": 2})), Ordering::Less);
321        assert_eq!(collate(&json!({"a": 1}), &json!({"b": 1})), Ordering::Less);
322    }
323
324    #[test]
325    fn indexable_string_preserves_order() {
326        let values = vec![
327            json!(null),
328            json!(false),
329            json!(true),
330            json!(0),
331            json!(1),
332            json!(100),
333            json!("a"),
334            json!("b"),
335            json!([]),
336            json!({}),
337        ];
338
339        let encoded: Vec<String> = values.iter().map(to_indexable_string).collect();
340
341        for i in 0..encoded.len() {
342            for j in (i + 1)..encoded.len() {
343                assert!(
344                    encoded[i] < encoded[j],
345                    "encoded({:?}) = {:?} should be < encoded({:?}) = {:?}",
346                    values[i],
347                    encoded[i],
348                    values[j],
349                    encoded[j]
350                );
351            }
352        }
353    }
354
355    #[test]
356    fn indexable_string_negative_numbers() {
357        let small = to_indexable_string(&json!(-100));
358        let big = to_indexable_string(&json!(-1));
359        let zero = to_indexable_string(&json!(0));
360        assert!(small < big, "-100 should sort before -1");
361        assert!(big < zero, "-1 should sort before 0");
362    }
363}