Skip to main content

ankurah_core/
collation.rs

1use std::cmp::Ordering;
2
3use ankql::ast;
4use ankurah_proto::EntityId;
5/// Represents a bound in a range query
6#[derive(Debug, Clone, PartialEq)]
7pub enum RangeBound<T> {
8    Included(T),
9    Excluded(T),
10    Unbounded,
11}
12
13/// Trait for types that support collation operations
14pub trait Collatable {
15    /// Convert the value to its binary representation for collation
16    fn to_bytes(&self) -> Vec<u8>;
17
18    /// Returns the immediate successor's binary representation if one exists
19    fn successor_bytes(&self) -> Option<Vec<u8>>;
20
21    /// Returns the immediate predecessor's binary representation if one exists
22    fn predecessor_bytes(&self) -> Option<Vec<u8>>;
23
24    /// Returns true if this value represents a minimum bound in its domain
25    fn is_minimum(&self) -> bool;
26
27    /// Returns true if this value represents a maximum bound in its domain
28    fn is_maximum(&self) -> bool;
29
30    /// Compare two values in the collation order
31    fn compare(&self, other: &Self) -> Ordering { self.to_bytes().cmp(&other.to_bytes()) }
32
33    fn is_in_range(&self, lower: RangeBound<&Self>, upper: RangeBound<&Self>) -> bool {
34        match (lower, upper) {
35            (RangeBound::Included(l), RangeBound::Included(u)) => self.compare(l) != Ordering::Less && self.compare(u) != Ordering::Greater,
36            (RangeBound::Included(l), RangeBound::Excluded(u)) => self.compare(l) != Ordering::Less && self.compare(u) == Ordering::Less,
37            (RangeBound::Excluded(l), RangeBound::Included(u)) => {
38                self.compare(l) == Ordering::Greater && self.compare(u) != Ordering::Greater
39            }
40            (RangeBound::Excluded(l), RangeBound::Excluded(u)) => self.compare(l) == Ordering::Greater && self.compare(u) == Ordering::Less,
41            (RangeBound::Unbounded, RangeBound::Included(u)) => self.compare(u) != Ordering::Greater,
42            (RangeBound::Unbounded, RangeBound::Excluded(u)) => self.compare(u) == Ordering::Less,
43            (RangeBound::Included(l), RangeBound::Unbounded) => self.compare(l) != Ordering::Less,
44            (RangeBound::Excluded(l), RangeBound::Unbounded) => self.compare(l) == Ordering::Greater,
45            (RangeBound::Unbounded, RangeBound::Unbounded) => true,
46        }
47    }
48}
49
50impl Collatable for ast::Literal {
51    fn to_bytes(&self) -> Vec<u8> {
52        match self {
53            ast::Literal::String(s) => s.as_bytes().to_vec(),
54            ast::Literal::I16(i) => i.to_be_bytes().to_vec(),
55            ast::Literal::I32(i) => i.to_be_bytes().to_vec(),
56            ast::Literal::I64(i) => i.to_be_bytes().to_vec(),
57            ast::Literal::F64(f) => {
58                let bits = if f.is_nan() {
59                    u64::MAX // NaN sorts last
60                } else {
61                    let bits = f.to_bits();
62                    if *f >= 0.0 {
63                        bits ^ (1 << 63) // Flip sign bit for positive numbers
64                    } else {
65                        !bits // Flip all bits for negative numbers
66                    }
67                };
68                bits.to_be_bytes().to_vec()
69            }
70            ast::Literal::Bool(b) => vec![*b as u8],
71            ast::Literal::EntityId(ulid) => ulid.to_bytes().to_vec(),
72            ast::Literal::Object(bytes) => bytes.clone(),
73            ast::Literal::Binary(bytes) => bytes.clone(),
74            ast::Literal::Json(json) => serde_json::to_vec(json).unwrap_or_default(),
75        }
76    }
77
78    fn successor_bytes(&self) -> Option<Vec<u8>> {
79        match self {
80            ast::Literal::String(s) => {
81                if s.is_empty() {
82                    let mut bytes = s.as_bytes().to_vec();
83                    // TODO - I think this is wrong. We shouldn't just push a byte. We should increment by one bit perhaps?
84                    // It also occurs to me that we need a fixed length for strings in order collate properly.
85                    bytes.push(0);
86                    Some(bytes)
87                } else {
88                    let mut bytes = s.as_bytes().to_vec();
89                    // TODO - I think this is wrong
90                    bytes.push(0);
91                    Some(bytes)
92                }
93            }
94            ast::Literal::I16(i) => {
95                if *i == i16::MAX {
96                    None
97                } else {
98                    Some((i + 1).to_be_bytes().to_vec())
99                }
100            }
101            ast::Literal::I32(i) => {
102                if *i == i32::MAX {
103                    None
104                } else {
105                    Some((i + 1).to_be_bytes().to_vec())
106                }
107            }
108            ast::Literal::I64(i) => {
109                if *i == i64::MAX {
110                    None
111                } else {
112                    Some((i + 1).to_be_bytes().to_vec())
113                }
114            }
115            ast::Literal::F64(f) => {
116                if f.is_nan() || (f.is_infinite() && *f > 0.0) {
117                    None
118                } else {
119                    let bits = if *f >= 0.0 { f.to_bits() ^ (1 << 63) } else { !f.to_bits() };
120                    let next_bits = bits + 1;
121                    Some(next_bits.to_be_bytes().to_vec())
122                }
123            }
124            ast::Literal::Bool(b) => {
125                if !b {
126                    None
127                } else {
128                    Some(vec![1])
129                }
130            }
131            ast::Literal::EntityId(ulid) => {
132                let mut bytes = ulid.to_bytes();
133                // Find the rightmost byte that can be incremented
134                for i in (0..bytes.len()).rev() {
135                    if bytes[i] < 255 {
136                        bytes[i] += 1;
137                        // Zero out all bytes to the right
138                        for j in (i + 1)..bytes.len() {
139                            bytes[j] = 0;
140                        }
141                        return Some(bytes.to_vec());
142                    }
143                }
144                None // All bytes are 255, no successor
145            }
146            ast::Literal::Object(bytes) | ast::Literal::Binary(bytes) => {
147                let mut bytes = bytes.clone();
148                // Find the rightmost byte that can be incremented
149                for i in (0..bytes.len()).rev() {
150                    if bytes[i] < 255 {
151                        bytes[i] += 1;
152                        // Zero out all bytes to the right
153                        for j in (i + 1)..bytes.len() {
154                            bytes[j] = 0;
155                        }
156                        return Some(bytes);
157                    }
158                }
159                // All bytes are 255, append a zero byte
160                bytes.push(0);
161                Some(bytes)
162            }
163            // JSON doesn't support successor/predecessor (dynamic types)
164            ast::Literal::Json(_) => None,
165        }
166    }
167
168    fn predecessor_bytes(&self) -> Option<Vec<u8>> {
169        match self {
170            ast::Literal::String(s) => {
171                if s.is_empty() {
172                    None
173                } else {
174                    let bytes = s.as_bytes();
175                    Some(bytes[..bytes.len() - 1].to_vec())
176                }
177            }
178            ast::Literal::I16(i) => {
179                if *i == i16::MIN {
180                    None
181                } else {
182                    Some((i - 1).to_be_bytes().to_vec())
183                }
184            }
185            ast::Literal::I32(i) => {
186                if *i == i32::MIN {
187                    None
188                } else {
189                    Some((i - 1).to_be_bytes().to_vec())
190                }
191            }
192            ast::Literal::I64(i) => {
193                if *i == i64::MIN {
194                    None
195                } else {
196                    Some((i - 1).to_be_bytes().to_vec())
197                }
198            }
199            ast::Literal::F64(f) => {
200                if f.is_nan() || (f.is_infinite() && *f < 0.0) {
201                    None
202                } else {
203                    let bits = if *f >= 0.0 { f.to_bits() ^ (1 << 63) } else { !f.to_bits() };
204                    let prev_bits = bits - 1;
205                    Some(prev_bits.to_be_bytes().to_vec())
206                }
207            }
208            ast::Literal::Bool(b) => {
209                if *b {
210                    Some(vec![0])
211                } else {
212                    None
213                }
214            }
215            ast::Literal::EntityId(ulid) => {
216                let mut bytes = ulid.to_bytes();
217                // Find the rightmost byte that can be decremented
218                for i in (0..bytes.len()).rev() {
219                    if bytes[i] > 0 {
220                        bytes[i] -= 1;
221                        // Set all bytes to the right to 255
222                        for j in (i + 1)..bytes.len() {
223                            bytes[j] = 255;
224                        }
225                        return Some(bytes.to_vec());
226                    }
227                }
228                None // All bytes are 0, no predecessor
229            }
230            ast::Literal::Object(bytes) | ast::Literal::Binary(bytes) => {
231                if bytes.is_empty() {
232                    None
233                } else {
234                    let mut bytes = bytes.clone();
235                    // Find the rightmost byte that can be decremented
236                    for i in (0..bytes.len()).rev() {
237                        if bytes[i] > 0 {
238                            bytes[i] -= 1;
239                            // Set all bytes to the right to 255
240                            for j in (i + 1)..bytes.len() {
241                                bytes[j] = 255;
242                            }
243                            return Some(bytes);
244                        }
245                    }
246                    // All bytes are 0, remove the last byte
247                    if bytes.len() > 1 {
248                        bytes.pop();
249                        Some(bytes)
250                    } else {
251                        None
252                    }
253                }
254            }
255            // JSON doesn't support successor/predecessor (dynamic types)
256            ast::Literal::Json(_) => None,
257        }
258    }
259
260    fn is_minimum(&self) -> bool {
261        match self {
262            ast::Literal::String(s) => s.is_empty(),
263            ast::Literal::I16(i) => *i == i16::MIN,
264            ast::Literal::I32(i) => *i == i32::MIN,
265            ast::Literal::I64(i) => *i == i64::MIN,
266            ast::Literal::F64(f) => *f == f64::NEG_INFINITY,
267            ast::Literal::Bool(b) => !b,
268            ast::Literal::EntityId(ulid) => ulid.to_bytes().iter().all(|&b| b == 0),
269            ast::Literal::Object(bytes) | ast::Literal::Binary(bytes) => bytes.is_empty(),
270            ast::Literal::Json(_) => false, // JSON doesn't have a minimum
271        }
272    }
273
274    fn is_maximum(&self) -> bool {
275        match self {
276            ast::Literal::String(_) => false, // Strings have no theoretical maximum
277            ast::Literal::I16(i) => *i == i16::MAX,
278            ast::Literal::I32(i) => *i == i32::MAX,
279            ast::Literal::I64(i) => *i == i64::MAX,
280            ast::Literal::F64(f) => *f == f64::INFINITY,
281            ast::Literal::Bool(b) => *b,
282            ast::Literal::EntityId(ulid) => ulid.to_bytes().iter().all(|&b| b == 255),
283            ast::Literal::Object(_) | ast::Literal::Binary(_) => false, // No theoretical maximum
284            ast::Literal::Json(_) => false,                             // JSON doesn't have a maximum
285        }
286    }
287}
288
289// // Implementation for strings
290impl Collatable for &str {
291    fn to_bytes(&self) -> Vec<u8> { self.as_bytes().to_vec() }
292
293    fn successor_bytes(&self) -> Option<Vec<u8>> {
294        if self.is_maximum() {
295            None
296        } else {
297            let mut bytes = self.as_bytes().to_vec();
298            bytes.push(0);
299            Some(bytes)
300        }
301    }
302
303    fn predecessor_bytes(&self) -> Option<Vec<u8>> {
304        if self.is_minimum() {
305            None
306        } else {
307            let bytes = self.as_bytes();
308            if bytes.is_empty() {
309                None
310            } else {
311                Some(bytes[..bytes.len() - 1].to_vec())
312            }
313        }
314    }
315
316    fn is_minimum(&self) -> bool { self.is_empty() }
317
318    fn is_maximum(&self) -> bool {
319        false // Strings have no theoretical maximum
320    }
321}
322
323// Implementation for integers
324impl Collatable for i64 {
325    fn to_bytes(&self) -> Vec<u8> {
326        // Use big-endian encoding to preserve ordering
327        self.to_be_bytes().to_vec()
328    }
329
330    fn successor_bytes(&self) -> Option<Vec<u8>> {
331        if self == &i64::MAX {
332            None
333        } else {
334            Some((self + 1).to_be_bytes().to_vec())
335        }
336    }
337
338    fn predecessor_bytes(&self) -> Option<Vec<u8>> {
339        if self == &i64::MIN {
340            None
341        } else {
342            Some((self - 1).to_be_bytes().to_vec())
343        }
344    }
345
346    fn is_minimum(&self) -> bool { *self == i64::MIN }
347
348    fn is_maximum(&self) -> bool { *self == i64::MAX }
349}
350
351// Implementation for floats
352impl Collatable for f64 {
353    fn to_bytes(&self) -> Vec<u8> {
354        let bits = if self.is_nan() {
355            u64::MAX // NaN sorts last
356        } else {
357            let bits = self.to_bits();
358            if *self >= 0.0 {
359                bits ^ (1 << 63) // Flip sign bit for positive numbers
360            } else {
361                !bits // Flip all bits for negative numbers
362            }
363        };
364        bits.to_be_bytes().to_vec()
365    }
366
367    fn successor_bytes(&self) -> Option<Vec<u8>> {
368        if self.is_nan() || (self.is_infinite() && *self > 0.0) {
369            None
370        } else {
371            let bits = if *self >= 0.0 {
372                self.to_bits() ^ (1 << 63) // Apply same sign bit flip as to_bytes
373            } else {
374                !self.to_bits() // Apply same bit inversion as to_bytes
375            };
376            let next_bits = bits + 1;
377            Some(next_bits.to_be_bytes().to_vec())
378        }
379    }
380
381    fn predecessor_bytes(&self) -> Option<Vec<u8>> {
382        if self.is_nan() || (self.is_infinite() && *self < 0.0) {
383            None
384        } else {
385            let bits = if *self >= 0.0 {
386                self.to_bits() ^ (1 << 63) // Apply same sign bit flip as to_bytes
387            } else {
388                !self.to_bits() // Apply same bit inversion as to_bytes
389            };
390            let prev_bits = bits - 1;
391            Some(prev_bits.to_be_bytes().to_vec())
392        }
393    }
394
395    fn is_minimum(&self) -> bool { *self == f64::NEG_INFINITY }
396
397    fn is_maximum(&self) -> bool { *self == f64::INFINITY }
398}
399
400// Implementation for EntityId (ULIDs have natural lexicographic ordering)
401impl Collatable for EntityId {
402    fn to_bytes(&self) -> Vec<u8> {
403        // EntityId is based on ULID which has lexicographic ordering
404        self.to_bytes().to_vec()
405    }
406
407    fn successor_bytes(&self) -> Option<Vec<u8>> {
408        if self.is_maximum() {
409            None
410        } else {
411            let mut bytes = self.to_bytes();
412            // Find the rightmost byte that can be incremented
413            for i in (0..bytes.len()).rev() {
414                if bytes[i] < 255 {
415                    bytes[i] += 1;
416                    // Zero out all bytes to the right
417                    for j in (i + 1)..bytes.len() {
418                        bytes[j] = 0;
419                    }
420                    return Some(bytes.to_vec());
421                }
422            }
423            None // All bytes are 255, no successor
424        }
425    }
426
427    fn predecessor_bytes(&self) -> Option<Vec<u8>> {
428        if self.is_minimum() {
429            None
430        } else {
431            let mut bytes = self.to_bytes();
432            // Find the rightmost byte that can be decremented
433            for i in (0..bytes.len()).rev() {
434                if bytes[i] > 0 {
435                    bytes[i] -= 1;
436                    // Set all bytes to the right to 255
437                    for j in (i + 1)..bytes.len() {
438                        bytes[j] = 255;
439                    }
440                    return Some(bytes.to_vec());
441                }
442            }
443            None // All bytes are 0, no predecessor
444        }
445    }
446
447    fn is_minimum(&self) -> bool { self.to_bytes().iter().all(|&b| b == 0) }
448
449    fn is_maximum(&self) -> bool { self.to_bytes().iter().all(|&b| b == 255) }
450}
451
452#[cfg(test)]
453mod tests {
454    use super::*;
455
456    #[test]
457    fn test_string_collation() {
458        let s = "hello";
459        assert!(s.successor_bytes().unwrap() > s.to_bytes());
460        assert!(s.predecessor_bytes().unwrap() < s.to_bytes());
461        assert!(!s.is_minimum());
462        assert!(!s.is_maximum());
463
464        let empty = "";
465        assert!(empty.is_minimum());
466        assert!(empty.predecessor_bytes().is_none());
467    }
468
469    #[test]
470    fn test_integer_collation() {
471        let n = 42i64;
472        assert_eq!(i64::from_be_bytes(n.successor_bytes().unwrap().try_into().unwrap()), 43);
473        assert_eq!(i64::from_be_bytes(n.predecessor_bytes().unwrap().try_into().unwrap()), 41);
474        assert!(!n.is_minimum());
475        assert!(!n.is_maximum());
476
477        assert!(i64::MAX.successor_bytes().is_none());
478        assert!(i64::MIN.predecessor_bytes().is_none());
479        assert!(i64::MAX.is_maximum());
480        assert!(i64::MIN.is_minimum());
481    }
482
483    #[test]
484    fn test_float_collation() {
485        let f = 1.0f64;
486        assert!(f.successor_bytes().unwrap() > f.to_bytes());
487        assert!(f.predecessor_bytes().unwrap() < f.to_bytes());
488        assert!(!f.is_minimum());
489        assert!(!f.is_maximum());
490
491        assert!(f64::INFINITY.is_maximum());
492        assert!(f64::NEG_INFINITY.is_minimum());
493        assert!(f64::INFINITY.successor_bytes().is_none());
494        assert!(f64::NEG_INFINITY.predecessor_bytes().is_none());
495
496        let nan = f64::NAN;
497        assert!(nan.successor_bytes().is_none());
498        assert!(nan.predecessor_bytes().is_none());
499    }
500
501    #[test]
502    fn test_range_bounds() {
503        let n = 42i64;
504
505        // Test inclusive bounds
506        assert!(n.is_in_range(RangeBound::Included(&40), RangeBound::Included(&45)));
507        assert!(n.is_in_range(RangeBound::Included(&42), RangeBound::Included(&45)));
508        assert!(n.is_in_range(RangeBound::Included(&40), RangeBound::Included(&42)));
509
510        // Test exclusive bounds
511        assert!(n.is_in_range(RangeBound::Excluded(&40), RangeBound::Excluded(&43)));
512        assert!(!n.is_in_range(RangeBound::Excluded(&42), RangeBound::Excluded(&43)));
513
514        // Test mixed bounds
515        assert!(n.is_in_range(RangeBound::Included(&42), RangeBound::Excluded(&43)));
516        assert!(!n.is_in_range(RangeBound::Excluded(&41), RangeBound::Excluded(&42)));
517
518        // Test unbounded
519        assert!(n.is_in_range(RangeBound::Unbounded, RangeBound::Included(&45)));
520        assert!(n.is_in_range(RangeBound::Included(&40), RangeBound::Unbounded));
521        assert!(n.is_in_range(RangeBound::Unbounded, RangeBound::Unbounded));
522    }
523
524    #[test]
525    fn test_literal_i16_collation() {
526        let lit = ast::Literal::I16(100);
527        assert!(lit.successor_bytes().unwrap() > lit.to_bytes());
528        assert!(lit.predecessor_bytes().unwrap() < lit.to_bytes());
529        assert!(!lit.is_minimum());
530        assert!(!lit.is_maximum());
531
532        let max_lit = ast::Literal::I16(i16::MAX);
533        let min_lit = ast::Literal::I16(i16::MIN);
534        assert!(max_lit.successor_bytes().is_none());
535        assert!(min_lit.predecessor_bytes().is_none());
536        assert!(max_lit.is_maximum());
537        assert!(min_lit.is_minimum());
538    }
539
540    #[test]
541    fn test_literal_i32_collation() {
542        let lit = ast::Literal::I32(1000);
543        assert!(lit.successor_bytes().unwrap() > lit.to_bytes());
544        assert!(lit.predecessor_bytes().unwrap() < lit.to_bytes());
545        assert!(!lit.is_minimum());
546        assert!(!lit.is_maximum());
547
548        let max_lit = ast::Literal::I32(i32::MAX);
549        let min_lit = ast::Literal::I32(i32::MIN);
550        assert!(max_lit.successor_bytes().is_none());
551        assert!(min_lit.predecessor_bytes().is_none());
552        assert!(max_lit.is_maximum());
553        assert!(min_lit.is_minimum());
554    }
555
556    #[test]
557    fn test_literal_entity_id_collation() {
558        use ulid::Ulid;
559        let ulid = Ulid::new();
560        let lit = ast::Literal::EntityId(ulid);
561
562        // Test basic operations
563        assert!(!lit.is_minimum());
564        assert!(!lit.is_maximum());
565
566        // Test minimum ULID (all zeros)
567        let min_ulid = Ulid::from_bytes([0; 16]);
568        let min_lit = ast::Literal::EntityId(min_ulid);
569        assert!(min_lit.is_minimum());
570        assert!(min_lit.predecessor_bytes().is_none());
571
572        // Test maximum ULID (all 255s)
573        let max_ulid = Ulid::from_bytes([255; 16]);
574        let max_lit = ast::Literal::EntityId(max_ulid);
575        assert!(max_lit.is_maximum());
576        assert!(max_lit.successor_bytes().is_none());
577    }
578
579    #[test]
580    fn test_literal_binary_collation() {
581        let lit = ast::Literal::Binary(vec![1, 2, 3]);
582        assert!(lit.successor_bytes().unwrap() > lit.to_bytes());
583        assert!(lit.predecessor_bytes().unwrap() < lit.to_bytes());
584        assert!(!lit.is_minimum());
585        assert!(!lit.is_maximum());
586
587        let empty_lit = ast::Literal::Binary(vec![]);
588        assert!(empty_lit.is_minimum());
589        assert!(empty_lit.predecessor_bytes().is_none());
590        assert!(!empty_lit.is_maximum());
591    }
592
593    #[test]
594    fn test_literal_object_collation() {
595        let lit = ast::Literal::Object(vec![10, 20, 30]);
596        assert!(lit.successor_bytes().unwrap() > lit.to_bytes());
597        assert!(lit.predecessor_bytes().unwrap() < lit.to_bytes());
598        assert!(!lit.is_minimum());
599        assert!(!lit.is_maximum());
600
601        let empty_lit = ast::Literal::Object(vec![]);
602        assert!(empty_lit.is_minimum());
603        assert!(empty_lit.predecessor_bytes().is_none());
604        assert!(!empty_lit.is_maximum());
605    }
606}