Skip to main content

rouchdb_core/
collation.rs

1/// CouchDB collation order.
2///
3/// CouchDB (and PouchDB) use an Erlang-derived ordering for keys:
4///
5/// ```text
6/// null < boolean < number < string < array < object
7/// ```
8///
9/// This module provides comparison and encoding functions that match this
10/// ordering, ensuring consistent behavior across local storage and remote
11/// CouchDB instances.
12use serde_json::Value;
13use std::cmp::Ordering;
14
15// ---------------------------------------------------------------------------
16// Type ranking
17// ---------------------------------------------------------------------------
18
19/// Numeric type rank matching CouchDB collation.
20fn type_rank(v: &Value) -> u8 {
21    match v {
22        Value::Null => 1,
23        Value::Bool(_) => 2,
24        Value::Number(_) => 3,
25        Value::String(_) => 4,
26        Value::Array(_) => 5,
27        Value::Object(_) => 6,
28    }
29}
30
31// ---------------------------------------------------------------------------
32// Comparison
33// ---------------------------------------------------------------------------
34
35/// Compare two JSON values using CouchDB collation order.
36pub fn collate(a: &Value, b: &Value) -> Ordering {
37    let rank_a = type_rank(a);
38    let rank_b = type_rank(b);
39
40    if rank_a != rank_b {
41        return rank_a.cmp(&rank_b);
42    }
43
44    match (a, b) {
45        (Value::Null, Value::Null) => Ordering::Equal,
46        (Value::Bool(a), Value::Bool(b)) => a.cmp(b),
47        (Value::Number(a), Value::Number(b)) => {
48            let fa = a.as_f64().unwrap_or(0.0);
49            let fb = b.as_f64().unwrap_or(0.0);
50            // total_cmp gives a well-defined ordering for all f64 values
51            // including NaN (which shouldn't appear in JSON but be safe)
52            fa.total_cmp(&fb)
53        }
54        (Value::String(a), Value::String(b)) => a.cmp(b),
55        (Value::Array(a), Value::Array(b)) => {
56            // Element-by-element, shorter arrays sort first
57            for (ea, eb) in a.iter().zip(b.iter()) {
58                match collate(ea, eb) {
59                    Ordering::Equal => continue,
60                    other => return other,
61                }
62            }
63            a.len().cmp(&b.len())
64        }
65        (Value::Object(a), Value::Object(b)) => {
66            // Key-by-key comparison; fewer keys sort first.
67            // Keys are sorted before comparison.
68            let mut keys_a: Vec<&String> = a.keys().collect();
69            let mut keys_b: Vec<&String> = b.keys().collect();
70            keys_a.sort();
71            keys_b.sort();
72
73            for (ka, kb) in keys_a.iter().zip(keys_b.iter()) {
74                match ka.cmp(kb) {
75                    Ordering::Equal => {}
76                    other => return other,
77                }
78                match collate(&a[*ka], &b[*kb]) {
79                    Ordering::Equal => continue,
80                    other => return other,
81                }
82            }
83            a.len().cmp(&b.len())
84        }
85        _ => Ordering::Equal, // Should be unreachable due to rank check
86    }
87}
88
89// ---------------------------------------------------------------------------
90// Indexable string encoding
91// ---------------------------------------------------------------------------
92
93/// Encode a JSON value into a string that sorts lexicographically in CouchDB
94/// collation order. Used as keys in the storage engine.
95///
96/// Format:
97/// - Null:    `"1"`
98/// - Bool:    `"2F"` / `"2T"`
99/// - Number:  `"3"` + encoded number
100/// - String:  `"4"` + string value
101/// - Array:   `"5"` + encoded elements separated by null byte
102/// - Object:  `"6"` + encoded key-value pairs
103pub fn to_indexable_string(v: &Value) -> String {
104    let mut s = String::new();
105    encode_value(v, &mut s);
106    s
107}
108
109fn encode_value(v: &Value, out: &mut String) {
110    match v {
111        Value::Null => out.push('1'),
112        Value::Bool(b) => {
113            out.push('2');
114            out.push(if *b { 'T' } else { 'F' });
115        }
116        Value::Number(n) => {
117            out.push('3');
118            encode_number(n.as_f64().unwrap_or(0.0), out);
119        }
120        Value::String(s) => {
121            out.push('4');
122            out.push_str(s);
123        }
124        Value::Array(arr) => {
125            out.push('5');
126            for (i, elem) in arr.iter().enumerate() {
127                if i > 0 {
128                    out.push('\0');
129                }
130                encode_value(elem, out);
131            }
132        }
133        Value::Object(obj) => {
134            out.push('6');
135            let mut keys: Vec<&String> = obj.keys().collect();
136            keys.sort();
137            for (i, key) in keys.iter().enumerate() {
138                if i > 0 {
139                    out.push('\0');
140                }
141                out.push_str(key);
142                out.push('\0');
143                encode_value(&obj[*key], out);
144            }
145        }
146    }
147}
148
149/// Encode a number such that the resulting string sorts lexicographically
150/// in numeric order.
151///
152/// Scheme (matching PouchDB's `numToIndexableString`):
153/// - Negative numbers: `0` + inverted representation
154/// - Zero: `1`
155/// - Positive numbers: `2` + magnitude (zero-padded to 3 digits) + mantissa
156fn encode_number(n: f64, out: &mut String) {
157    if n.is_nan() {
158        out.push('0'); // Sort NaN before all real numbers
159        return;
160    }
161    if n == f64::NEG_INFINITY {
162        out.push('0');
163        out.push_str("00000");
164        return;
165    }
166    if n == f64::INFINITY {
167        out.push('2');
168        out.push_str("99999");
169        return;
170    }
171    if n == 0.0 {
172        out.push('1');
173        return;
174    }
175
176    let is_negative = n < 0.0;
177    let abs_n = n.abs();
178
179    // Represent as: mantissa * 10^exponent
180    // where 1 <= mantissa < 10
181    let exponent = abs_n.log10().floor() as i64;
182    let mantissa = abs_n / 10f64.powi(exponent as i32);
183
184    // We offset the exponent by 10000 so it's always positive and
185    // zero-pad to 5 digits for consistent lexicographic ordering.
186    if is_negative {
187        // For negatives: invert so larger (closer to zero) sorts first
188        // Prefix with '0', then (10000 - exponent), then (10 - mantissa)
189        out.push('0');
190        let inv_exp = 10000 - exponent;
191        out.push_str(&format!("{:0>5}", inv_exp));
192        let inv_mantissa = 10.0 - mantissa;
193        let m_str = format!("{:.10}", inv_mantissa);
194        out.push_str(m_str.trim_end_matches('0'));
195    } else {
196        // For positives: prefix with '2', then (exponent + 10000), then mantissa
197        out.push('2');
198        let adj_exp = exponent + 10000;
199        out.push_str(&format!("{:0>5}", adj_exp));
200        let m_str = format!("{:.10}", mantissa);
201        out.push_str(m_str.trim_end_matches('0'));
202    }
203}
204
205// ---------------------------------------------------------------------------
206// Tests
207// ---------------------------------------------------------------------------
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212    use serde_json::json;
213
214    #[test]
215    fn type_ordering() {
216        let values = vec![
217            json!(null),
218            json!(false),
219            json!(true),
220            json!(-1),
221            json!(0),
222            json!(1),
223            json!(1.5),
224            json!(""),
225            json!("a"),
226            json!("b"),
227            json!([]),
228            json!([1]),
229            json!({}),
230            json!({"a": 1}),
231        ];
232
233        for i in 0..values.len() {
234            for j in (i + 1)..values.len() {
235                assert_eq!(
236                    collate(&values[i], &values[j]),
237                    Ordering::Less,
238                    "{:?} should be less than {:?}",
239                    values[i],
240                    values[j]
241                );
242            }
243        }
244    }
245
246    #[test]
247    fn null_equality() {
248        assert_eq!(collate(&json!(null), &json!(null)), Ordering::Equal);
249    }
250
251    #[test]
252    fn bool_ordering() {
253        assert_eq!(collate(&json!(false), &json!(true)), Ordering::Less);
254        assert_eq!(collate(&json!(true), &json!(true)), Ordering::Equal);
255    }
256
257    #[test]
258    fn number_ordering() {
259        assert_eq!(collate(&json!(-100), &json!(-1)), Ordering::Less);
260        assert_eq!(collate(&json!(0), &json!(1)), Ordering::Less);
261        assert_eq!(collate(&json!(1), &json!(2)), Ordering::Less);
262        assert_eq!(collate(&json!(1.5), &json!(2)), Ordering::Less);
263    }
264
265    #[test]
266    fn string_ordering() {
267        assert_eq!(collate(&json!("a"), &json!("b")), Ordering::Less);
268        assert_eq!(collate(&json!("aa"), &json!("b")), Ordering::Less);
269    }
270
271    #[test]
272    fn array_ordering() {
273        assert_eq!(collate(&json!([]), &json!([1])), Ordering::Less);
274        assert_eq!(collate(&json!([1]), &json!([2])), Ordering::Less);
275        assert_eq!(collate(&json!([1]), &json!([1, 2])), Ordering::Less);
276    }
277
278    #[test]
279    fn object_ordering() {
280        assert_eq!(collate(&json!({}), &json!({"a": 1})), Ordering::Less);
281        assert_eq!(collate(&json!({"a": 1}), &json!({"a": 2})), Ordering::Less);
282        assert_eq!(collate(&json!({"a": 1}), &json!({"b": 1})), Ordering::Less);
283    }
284
285    #[test]
286    fn indexable_string_preserves_order() {
287        let values = vec![
288            json!(null),
289            json!(false),
290            json!(true),
291            json!(0),
292            json!(1),
293            json!(100),
294            json!("a"),
295            json!("b"),
296            json!([]),
297            json!({}),
298        ];
299
300        let encoded: Vec<String> = values.iter().map(to_indexable_string).collect();
301
302        for i in 0..encoded.len() {
303            for j in (i + 1)..encoded.len() {
304                assert!(
305                    encoded[i] < encoded[j],
306                    "encoded({:?}) = {:?} should be < encoded({:?}) = {:?}",
307                    values[i],
308                    encoded[i],
309                    values[j],
310                    encoded[j]
311                );
312            }
313        }
314    }
315
316    #[test]
317    fn indexable_string_negative_numbers() {
318        let small = to_indexable_string(&json!(-100));
319        let big = to_indexable_string(&json!(-1));
320        let zero = to_indexable_string(&json!(0));
321        assert!(small < big, "-100 should sort before -1");
322        assert!(big < zero, "-1 should sort before 0");
323    }
324}