Skip to main content

rouchdb_core/
collation.rs

1/// CouchDB collation order.
2///
3/// CouchDB (and PouchDB) use an Erlang-derived ordering for keys:
4///
5/// ```text
6/// null < boolean < number < string < array < object
7/// ```
8///
9/// This module provides comparison and encoding functions that match this
10/// ordering, ensuring consistent behavior across local storage and remote
11/// CouchDB instances.
12
13use serde_json::Value;
14use std::cmp::Ordering;
15
16// ---------------------------------------------------------------------------
17// Type ranking
18// ---------------------------------------------------------------------------
19
20/// Numeric type rank matching CouchDB collation.
21fn type_rank(v: &Value) -> u8 {
22    match v {
23        Value::Null => 1,
24        Value::Bool(_) => 2,
25        Value::Number(_) => 3,
26        Value::String(_) => 4,
27        Value::Array(_) => 5,
28        Value::Object(_) => 6,
29    }
30}
31
32// ---------------------------------------------------------------------------
33// Comparison
34// ---------------------------------------------------------------------------
35
36/// Compare two JSON values using CouchDB collation order.
37pub fn collate(a: &Value, b: &Value) -> Ordering {
38    let rank_a = type_rank(a);
39    let rank_b = type_rank(b);
40
41    if rank_a != rank_b {
42        return rank_a.cmp(&rank_b);
43    }
44
45    match (a, b) {
46        (Value::Null, Value::Null) => Ordering::Equal,
47        (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
48        (Value::Number(a), Value::Number(b)) => {
49            let fa = a.as_f64().unwrap_or(0.0);
50            let fb = b.as_f64().unwrap_or(0.0);
51            fa.partial_cmp(&fb).unwrap_or(Ordering::Equal)
52        }
53        (Value::String(a), Value::String(b)) => a.cmp(b),
54        (Value::Array(a), Value::Array(b)) => {
55            // Element-by-element, shorter arrays sort first
56            for (ea, eb) in a.iter().zip(b.iter()) {
57                match collate(ea, eb) {
58                    Ordering::Equal => continue,
59                    other => return other,
60                }
61            }
62            a.len().cmp(&b.len())
63        }
64        (Value::Object(a), Value::Object(b)) => {
65            // Key-by-key comparison; fewer keys sort first.
66            // Keys are sorted before comparison.
67            let mut keys_a: Vec<&String> = a.keys().collect();
68            let mut keys_b: Vec<&String> = b.keys().collect();
69            keys_a.sort();
70            keys_b.sort();
71
72            for (ka, kb) in keys_a.iter().zip(keys_b.iter()) {
73                match ka.cmp(kb) {
74                    Ordering::Equal => {}
75                    other => return other,
76                }
77                match collate(&a[*ka], &b[*kb]) {
78                    Ordering::Equal => continue,
79                    other => return other,
80                }
81            }
82            a.len().cmp(&b.len())
83        }
84        _ => Ordering::Equal, // Should be unreachable due to rank check
85    }
86}
87
88// ---------------------------------------------------------------------------
89// Indexable string encoding
90// ---------------------------------------------------------------------------
91
92/// Encode a JSON value into a string that sorts lexicographically in CouchDB
93/// collation order. Used as keys in the storage engine.
94///
95/// Format:
96/// - Null:    `"1"`
97/// - Bool:    `"2F"` / `"2T"`
98/// - Number:  `"3"` + encoded number
99/// - String:  `"4"` + string value
100/// - Array:   `"5"` + encoded elements separated by null byte
101/// - Object:  `"6"` + encoded key-value pairs
102pub fn to_indexable_string(v: &Value) -> String {
103    let mut s = String::new();
104    encode_value(v, &mut s);
105    s
106}
107
108fn encode_value(v: &Value, out: &mut String) {
109    match v {
110        Value::Null => out.push('1'),
111        Value::Bool(b) => {
112            out.push('2');
113            out.push(if *b { 'T' } else { 'F' });
114        }
115        Value::Number(n) => {
116            out.push('3');
117            encode_number(n.as_f64().unwrap_or(0.0), out);
118        }
119        Value::String(s) => {
120            out.push('4');
121            out.push_str(s);
122        }
123        Value::Array(arr) => {
124            out.push('5');
125            for (i, elem) in arr.iter().enumerate() {
126                if i > 0 {
127                    out.push('\0');
128                }
129                encode_value(elem, out);
130            }
131        }
132        Value::Object(obj) => {
133            out.push('6');
134            let mut keys: Vec<&String> = obj.keys().collect();
135            keys.sort();
136            for (i, key) in keys.iter().enumerate() {
137                if i > 0 {
138                    out.push('\0');
139                }
140                out.push_str(key);
141                out.push('\0');
142                encode_value(&obj[*key], out);
143            }
144        }
145    }
146}
147
148/// Encode a number such that the resulting string sorts lexicographically
149/// in numeric order.
150///
151/// Scheme (matching PouchDB's `numToIndexableString`):
152/// - Negative numbers: `0` + inverted representation
153/// - Zero: `1`
154/// - Positive numbers: `2` + magnitude (zero-padded to 3 digits) + mantissa
155fn encode_number(n: f64, out: &mut String) {
156    if n == 0.0 {
157        out.push('1');
158        return;
159    }
160
161    let is_negative = n < 0.0;
162    let abs_n = n.abs();
163
164    // Represent as: mantissa * 10^exponent
165    // where 1 <= mantissa < 10
166    let exponent = abs_n.log10().floor() as i64;
167    let mantissa = abs_n / 10f64.powi(exponent as i32);
168
169    // We offset the exponent by 10000 so it's always positive and
170    // zero-pad to 5 digits for consistent lexicographic ordering.
171    if is_negative {
172        // For negatives: invert so larger (closer to zero) sorts first
173        // Prefix with '0', then (10000 - exponent), then (10 - mantissa)
174        out.push('0');
175        let inv_exp = 10000 - exponent;
176        out.push_str(&format!("{:0>5}", inv_exp));
177        let inv_mantissa = 10.0 - mantissa;
178        let m_str = format!("{:.10}", inv_mantissa);
179        out.push_str(m_str.trim_end_matches('0'));
180    } else {
181        // For positives: prefix with '2', then (exponent + 10000), then mantissa
182        out.push('2');
183        let adj_exp = exponent + 10000;
184        out.push_str(&format!("{:0>5}", adj_exp));
185        let m_str = format!("{:.10}", mantissa);
186        out.push_str(m_str.trim_end_matches('0'));
187    }
188}
189
190// ---------------------------------------------------------------------------
191// Tests
192// ---------------------------------------------------------------------------
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197    use serde_json::json;
198
199    #[test]
200    fn type_ordering() {
201        let values = vec![
202            json!(null),
203            json!(false),
204            json!(true),
205            json!(-1),
206            json!(0),
207            json!(1),
208            json!(1.5),
209            json!(""),
210            json!("a"),
211            json!("b"),
212            json!([]),
213            json!([1]),
214            json!({}),
215            json!({"a": 1}),
216        ];
217
218        for i in 0..values.len() {
219            for j in (i + 1)..values.len() {
220                assert_eq!(
221                    collate(&values[i], &values[j]),
222                    Ordering::Less,
223                    "{:?} should be less than {:?}",
224                    values[i],
225                    values[j]
226                );
227            }
228        }
229    }
230
231    #[test]
232    fn null_equality() {
233        assert_eq!(collate(&json!(null), &json!(null)), Ordering::Equal);
234    }
235
236    #[test]
237    fn bool_ordering() {
238        assert_eq!(collate(&json!(false), &json!(true)), Ordering::Less);
239        assert_eq!(collate(&json!(true), &json!(true)), Ordering::Equal);
240    }
241
242    #[test]
243    fn number_ordering() {
244        assert_eq!(collate(&json!(-100), &json!(-1)), Ordering::Less);
245        assert_eq!(collate(&json!(0), &json!(1)), Ordering::Less);
246        assert_eq!(collate(&json!(1), &json!(2)), Ordering::Less);
247        assert_eq!(collate(&json!(1.5), &json!(2)), Ordering::Less);
248    }
249
250    #[test]
251    fn string_ordering() {
252        assert_eq!(collate(&json!("a"), &json!("b")), Ordering::Less);
253        assert_eq!(collate(&json!("aa"), &json!("b")), Ordering::Less);
254    }
255
256    #[test]
257    fn array_ordering() {
258        assert_eq!(collate(&json!([]), &json!([1])), Ordering::Less);
259        assert_eq!(collate(&json!([1]), &json!([2])), Ordering::Less);
260        assert_eq!(collate(&json!([1]), &json!([1, 2])), Ordering::Less);
261    }
262
263    #[test]
264    fn object_ordering() {
265        assert_eq!(collate(&json!({}), &json!({"a": 1})), Ordering::Less);
266        assert_eq!(
267            collate(&json!({"a": 1}), &json!({"a": 2})),
268            Ordering::Less
269        );
270        assert_eq!(
271            collate(&json!({"a": 1}), &json!({"b": 1})),
272            Ordering::Less
273        );
274    }
275
276    #[test]
277    fn indexable_string_preserves_order() {
278        let values = vec![
279            json!(null),
280            json!(false),
281            json!(true),
282            json!(0),
283            json!(1),
284            json!(100),
285            json!("a"),
286            json!("b"),
287            json!([]),
288            json!({}),
289        ];
290
291        let encoded: Vec<String> = values.iter().map(|v| to_indexable_string(v)).collect();
292
293        for i in 0..encoded.len() {
294            for j in (i + 1)..encoded.len() {
295                assert!(
296                    encoded[i] < encoded[j],
297                    "encoded({:?}) = {:?} should be < encoded({:?}) = {:?}",
298                    values[i],
299                    encoded[i],
300                    values[j],
301                    encoded[j]
302                );
303            }
304        }
305    }
306
307    #[test]
308    fn indexable_string_negative_numbers() {
309        let small = to_indexable_string(&json!(-100));
310        let big = to_indexable_string(&json!(-1));
311        let zero = to_indexable_string(&json!(0));
312        assert!(small < big, "-100 should sort before -1");
313        assert!(big < zero, "-1 should sort before 0");
314    }
315}