ankurah_core/
collation.rs

1use std::cmp::Ordering;
2
3use ankql::ast;
4use ankurah_proto::EntityId;
5/// Represents a bound in a range query
6#[derive(Debug, Clone, PartialEq)]
7pub enum RangeBound<T> {
8    Included(T),
9    Excluded(T),
10    Unbounded,
11}
12
13/// Trait for types that support collation operations
14pub trait Collatable {
15    /// Convert the value to its binary representation for collation
16    fn to_bytes(&self) -> Vec<u8>;
17
18    /// Returns the immediate successor's binary representation if one exists
19    fn successor_bytes(&self) -> Option<Vec<u8>>;
20
21    /// Returns the immediate predecessor's binary representation if one exists
22    fn predecessor_bytes(&self) -> Option<Vec<u8>>;
23
24    /// Returns true if this value represents a minimum bound in its domain
25    fn is_minimum(&self) -> bool;
26
27    /// Returns true if this value represents a maximum bound in its domain
28    fn is_maximum(&self) -> bool;
29
30    /// Compare two values in the collation order
31    fn compare(&self, other: &Self) -> Ordering { self.to_bytes().cmp(&other.to_bytes()) }
32
33    fn is_in_range(&self, lower: RangeBound<&Self>, upper: RangeBound<&Self>) -> bool {
34        match (lower, upper) {
35            (RangeBound::Included(l), RangeBound::Included(u)) => self.compare(l) != Ordering::Less && self.compare(u) != Ordering::Greater,
36            (RangeBound::Included(l), RangeBound::Excluded(u)) => self.compare(l) != Ordering::Less && self.compare(u) == Ordering::Less,
37            (RangeBound::Excluded(l), RangeBound::Included(u)) => {
38                self.compare(l) == Ordering::Greater && self.compare(u) != Ordering::Greater
39            }
40            (RangeBound::Excluded(l), RangeBound::Excluded(u)) => self.compare(l) == Ordering::Greater && self.compare(u) == Ordering::Less,
41            (RangeBound::Unbounded, RangeBound::Included(u)) => self.compare(u) != Ordering::Greater,
42            (RangeBound::Unbounded, RangeBound::Excluded(u)) => self.compare(u) == Ordering::Less,
43            (RangeBound::Included(l), RangeBound::Unbounded) => self.compare(l) != Ordering::Less,
44            (RangeBound::Excluded(l), RangeBound::Unbounded) => self.compare(l) == Ordering::Greater,
45            (RangeBound::Unbounded, RangeBound::Unbounded) => true,
46        }
47    }
48}
49
50impl Collatable for ast::Literal {
51    fn to_bytes(&self) -> Vec<u8> {
52        match self {
53            ast::Literal::String(s) => s.as_bytes().to_vec(),
54            ast::Literal::I16(i) => i.to_be_bytes().to_vec(),
55            ast::Literal::I32(i) => i.to_be_bytes().to_vec(),
56            ast::Literal::I64(i) => i.to_be_bytes().to_vec(),
57            ast::Literal::F64(f) => {
58                let bits = if f.is_nan() {
59                    u64::MAX // NaN sorts last
60                } else {
61                    let bits = f.to_bits();
62                    if *f >= 0.0 {
63                        bits ^ (1 << 63) // Flip sign bit for positive numbers
64                    } else {
65                        !bits // Flip all bits for negative numbers
66                    }
67                };
68                bits.to_be_bytes().to_vec()
69            }
70            ast::Literal::Bool(b) => vec![*b as u8],
71            ast::Literal::EntityId(ulid) => ulid.to_bytes().to_vec(),
72            ast::Literal::Object(bytes) => bytes.clone(),
73            ast::Literal::Binary(bytes) => bytes.clone(),
74        }
75    }
76
77    fn successor_bytes(&self) -> Option<Vec<u8>> {
78        match self {
79            ast::Literal::String(s) => {
80                if s.is_empty() {
81                    let mut bytes = s.as_bytes().to_vec();
82                    // TODO - I think this is wrong. We shouldn't just push a byte. We should increment by one bit perhaps?
83                    // It also occurs to me that we need a fixed length for strings in order collate properly.
84                    bytes.push(0);
85                    Some(bytes)
86                } else {
87                    let mut bytes = s.as_bytes().to_vec();
88                    // TODO - I think this is wrong
89                    bytes.push(0);
90                    Some(bytes)
91                }
92            }
93            ast::Literal::I16(i) => {
94                if *i == i16::MAX {
95                    None
96                } else {
97                    Some((i + 1).to_be_bytes().to_vec())
98                }
99            }
100            ast::Literal::I32(i) => {
101                if *i == i32::MAX {
102                    None
103                } else {
104                    Some((i + 1).to_be_bytes().to_vec())
105                }
106            }
107            ast::Literal::I64(i) => {
108                if *i == i64::MAX {
109                    None
110                } else {
111                    Some((i + 1).to_be_bytes().to_vec())
112                }
113            }
114            ast::Literal::F64(f) => {
115                if f.is_nan() || (f.is_infinite() && *f > 0.0) {
116                    None
117                } else {
118                    let bits = if *f >= 0.0 { f.to_bits() ^ (1 << 63) } else { !f.to_bits() };
119                    let next_bits = bits + 1;
120                    Some(next_bits.to_be_bytes().to_vec())
121                }
122            }
123            ast::Literal::Bool(b) => {
124                if !b {
125                    None
126                } else {
127                    Some(vec![1])
128                }
129            }
130            ast::Literal::EntityId(ulid) => {
131                let mut bytes = ulid.to_bytes();
132                // Find the rightmost byte that can be incremented
133                for i in (0..bytes.len()).rev() {
134                    if bytes[i] < 255 {
135                        bytes[i] += 1;
136                        // Zero out all bytes to the right
137                        for j in (i + 1)..bytes.len() {
138                            bytes[j] = 0;
139                        }
140                        return Some(bytes.to_vec());
141                    }
142                }
143                None // All bytes are 255, no successor
144            }
145            ast::Literal::Object(bytes) | ast::Literal::Binary(bytes) => {
146                let mut bytes = bytes.clone();
147                // Find the rightmost byte that can be incremented
148                for i in (0..bytes.len()).rev() {
149                    if bytes[i] < 255 {
150                        bytes[i] += 1;
151                        // Zero out all bytes to the right
152                        for j in (i + 1)..bytes.len() {
153                            bytes[j] = 0;
154                        }
155                        return Some(bytes);
156                    }
157                }
158                // All bytes are 255, append a zero byte
159                bytes.push(0);
160                Some(bytes)
161            }
162        }
163    }
164
165    fn predecessor_bytes(&self) -> Option<Vec<u8>> {
166        match self {
167            ast::Literal::String(s) => {
168                if s.is_empty() {
169                    None
170                } else {
171                    let bytes = s.as_bytes();
172                    Some(bytes[..bytes.len() - 1].to_vec())
173                }
174            }
175            ast::Literal::I16(i) => {
176                if *i == i16::MIN {
177                    None
178                } else {
179                    Some((i - 1).to_be_bytes().to_vec())
180                }
181            }
182            ast::Literal::I32(i) => {
183                if *i == i32::MIN {
184                    None
185                } else {
186                    Some((i - 1).to_be_bytes().to_vec())
187                }
188            }
189            ast::Literal::I64(i) => {
190                if *i == i64::MIN {
191                    None
192                } else {
193                    Some((i - 1).to_be_bytes().to_vec())
194                }
195            }
196            ast::Literal::F64(f) => {
197                if f.is_nan() || (f.is_infinite() && *f < 0.0) {
198                    None
199                } else {
200                    let bits = if *f >= 0.0 { f.to_bits() ^ (1 << 63) } else { !f.to_bits() };
201                    let prev_bits = bits - 1;
202                    Some(prev_bits.to_be_bytes().to_vec())
203                }
204            }
205            ast::Literal::Bool(b) => {
206                if *b {
207                    Some(vec![0])
208                } else {
209                    None
210                }
211            }
212            ast::Literal::EntityId(ulid) => {
213                let mut bytes = ulid.to_bytes();
214                // Find the rightmost byte that can be decremented
215                for i in (0..bytes.len()).rev() {
216                    if bytes[i] > 0 {
217                        bytes[i] -= 1;
218                        // Set all bytes to the right to 255
219                        for j in (i + 1)..bytes.len() {
220                            bytes[j] = 255;
221                        }
222                        return Some(bytes.to_vec());
223                    }
224                }
225                None // All bytes are 0, no predecessor
226            }
227            ast::Literal::Object(bytes) | ast::Literal::Binary(bytes) => {
228                if bytes.is_empty() {
229                    None
230                } else {
231                    let mut bytes = bytes.clone();
232                    // Find the rightmost byte that can be decremented
233                    for i in (0..bytes.len()).rev() {
234                        if bytes[i] > 0 {
235                            bytes[i] -= 1;
236                            // Set all bytes to the right to 255
237                            for j in (i + 1)..bytes.len() {
238                                bytes[j] = 255;
239                            }
240                            return Some(bytes);
241                        }
242                    }
243                    // All bytes are 0, remove the last byte
244                    if bytes.len() > 1 {
245                        bytes.pop();
246                        Some(bytes)
247                    } else {
248                        None
249                    }
250                }
251            }
252        }
253    }
254
255    fn is_minimum(&self) -> bool {
256        match self {
257            ast::Literal::String(s) => s.is_empty(),
258            ast::Literal::I16(i) => *i == i16::MIN,
259            ast::Literal::I32(i) => *i == i32::MIN,
260            ast::Literal::I64(i) => *i == i64::MIN,
261            ast::Literal::F64(f) => *f == f64::NEG_INFINITY,
262            ast::Literal::Bool(b) => !b,
263            ast::Literal::EntityId(ulid) => ulid.to_bytes().iter().all(|&b| b == 0),
264            ast::Literal::Object(bytes) | ast::Literal::Binary(bytes) => bytes.is_empty(),
265        }
266    }
267
268    fn is_maximum(&self) -> bool {
269        match self {
270            ast::Literal::String(_) => false, // Strings have no theoretical maximum
271            ast::Literal::I16(i) => *i == i16::MAX,
272            ast::Literal::I32(i) => *i == i32::MAX,
273            ast::Literal::I64(i) => *i == i64::MAX,
274            ast::Literal::F64(f) => *f == f64::INFINITY,
275            ast::Literal::Bool(b) => *b,
276            ast::Literal::EntityId(ulid) => ulid.to_bytes().iter().all(|&b| b == 255),
277            ast::Literal::Object(_) | ast::Literal::Binary(_) => false, // No theoretical maximum
278        }
279    }
280}
281
282// // Implementation for strings
283impl Collatable for &str {
284    fn to_bytes(&self) -> Vec<u8> { self.as_bytes().to_vec() }
285
286    fn successor_bytes(&self) -> Option<Vec<u8>> {
287        if self.is_maximum() {
288            None
289        } else {
290            let mut bytes = self.as_bytes().to_vec();
291            bytes.push(0);
292            Some(bytes)
293        }
294    }
295
296    fn predecessor_bytes(&self) -> Option<Vec<u8>> {
297        if self.is_minimum() {
298            None
299        } else {
300            let bytes = self.as_bytes();
301            if bytes.is_empty() {
302                None
303            } else {
304                Some(bytes[..bytes.len() - 1].to_vec())
305            }
306        }
307    }
308
309    fn is_minimum(&self) -> bool { self.is_empty() }
310
311    fn is_maximum(&self) -> bool {
312        false // Strings have no theoretical maximum
313    }
314}
315
316// Implementation for integers
317impl Collatable for i64 {
318    fn to_bytes(&self) -> Vec<u8> {
319        // Use big-endian encoding to preserve ordering
320        self.to_be_bytes().to_vec()
321    }
322
323    fn successor_bytes(&self) -> Option<Vec<u8>> {
324        if self == &i64::MAX {
325            None
326        } else {
327            Some((self + 1).to_be_bytes().to_vec())
328        }
329    }
330
331    fn predecessor_bytes(&self) -> Option<Vec<u8>> {
332        if self == &i64::MIN {
333            None
334        } else {
335            Some((self - 1).to_be_bytes().to_vec())
336        }
337    }
338
339    fn is_minimum(&self) -> bool { *self == i64::MIN }
340
341    fn is_maximum(&self) -> bool { *self == i64::MAX }
342}
343
344// Implementation for floats
345impl Collatable for f64 {
346    fn to_bytes(&self) -> Vec<u8> {
347        let bits = if self.is_nan() {
348            u64::MAX // NaN sorts last
349        } else {
350            let bits = self.to_bits();
351            if *self >= 0.0 {
352                bits ^ (1 << 63) // Flip sign bit for positive numbers
353            } else {
354                !bits // Flip all bits for negative numbers
355            }
356        };
357        bits.to_be_bytes().to_vec()
358    }
359
360    fn successor_bytes(&self) -> Option<Vec<u8>> {
361        if self.is_nan() || (self.is_infinite() && *self > 0.0) {
362            None
363        } else {
364            let bits = if *self >= 0.0 {
365                self.to_bits() ^ (1 << 63) // Apply same sign bit flip as to_bytes
366            } else {
367                !self.to_bits() // Apply same bit inversion as to_bytes
368            };
369            let next_bits = bits + 1;
370            Some(next_bits.to_be_bytes().to_vec())
371        }
372    }
373
374    fn predecessor_bytes(&self) -> Option<Vec<u8>> {
375        if self.is_nan() || (self.is_infinite() && *self < 0.0) {
376            None
377        } else {
378            let bits = if *self >= 0.0 {
379                self.to_bits() ^ (1 << 63) // Apply same sign bit flip as to_bytes
380            } else {
381                !self.to_bits() // Apply same bit inversion as to_bytes
382            };
383            let prev_bits = bits - 1;
384            Some(prev_bits.to_be_bytes().to_vec())
385        }
386    }
387
388    fn is_minimum(&self) -> bool { *self == f64::NEG_INFINITY }
389
390    fn is_maximum(&self) -> bool { *self == f64::INFINITY }
391}
392
393// Implementation for EntityId (ULIDs have natural lexicographic ordering)
394impl Collatable for EntityId {
395    fn to_bytes(&self) -> Vec<u8> {
396        // EntityId is based on ULID which has lexicographic ordering
397        self.to_bytes().to_vec()
398    }
399
400    fn successor_bytes(&self) -> Option<Vec<u8>> {
401        if self.is_maximum() {
402            None
403        } else {
404            let mut bytes = self.to_bytes();
405            // Find the rightmost byte that can be incremented
406            for i in (0..bytes.len()).rev() {
407                if bytes[i] < 255 {
408                    bytes[i] += 1;
409                    // Zero out all bytes to the right
410                    for j in (i + 1)..bytes.len() {
411                        bytes[j] = 0;
412                    }
413                    return Some(bytes.to_vec());
414                }
415            }
416            None // All bytes are 255, no successor
417        }
418    }
419
420    fn predecessor_bytes(&self) -> Option<Vec<u8>> {
421        if self.is_minimum() {
422            None
423        } else {
424            let mut bytes = self.to_bytes();
425            // Find the rightmost byte that can be decremented
426            for i in (0..bytes.len()).rev() {
427                if bytes[i] > 0 {
428                    bytes[i] -= 1;
429                    // Set all bytes to the right to 255
430                    for j in (i + 1)..bytes.len() {
431                        bytes[j] = 255;
432                    }
433                    return Some(bytes.to_vec());
434                }
435            }
436            None // All bytes are 0, no predecessor
437        }
438    }
439
440    fn is_minimum(&self) -> bool { self.to_bytes().iter().all(|&b| b == 0) }
441
442    fn is_maximum(&self) -> bool { self.to_bytes().iter().all(|&b| b == 255) }
443}
444
445#[cfg(test)]
446mod tests {
447    use super::*;
448
449    #[test]
450    fn test_string_collation() {
451        let s = "hello";
452        assert!(s.successor_bytes().unwrap() > s.to_bytes());
453        assert!(s.predecessor_bytes().unwrap() < s.to_bytes());
454        assert!(!s.is_minimum());
455        assert!(!s.is_maximum());
456
457        let empty = "";
458        assert!(empty.is_minimum());
459        assert!(empty.predecessor_bytes().is_none());
460    }
461
462    #[test]
463    fn test_integer_collation() {
464        let n = 42i64;
465        assert_eq!(i64::from_be_bytes(n.successor_bytes().unwrap().try_into().unwrap()), 43);
466        assert_eq!(i64::from_be_bytes(n.predecessor_bytes().unwrap().try_into().unwrap()), 41);
467        assert!(!n.is_minimum());
468        assert!(!n.is_maximum());
469
470        assert!(i64::MAX.successor_bytes().is_none());
471        assert!(i64::MIN.predecessor_bytes().is_none());
472        assert!(i64::MAX.is_maximum());
473        assert!(i64::MIN.is_minimum());
474    }
475
476    #[test]
477    fn test_float_collation() {
478        let f = 1.0f64;
479        assert!(f.successor_bytes().unwrap() > f.to_bytes());
480        assert!(f.predecessor_bytes().unwrap() < f.to_bytes());
481        assert!(!f.is_minimum());
482        assert!(!f.is_maximum());
483
484        assert!(f64::INFINITY.is_maximum());
485        assert!(f64::NEG_INFINITY.is_minimum());
486        assert!(f64::INFINITY.successor_bytes().is_none());
487        assert!(f64::NEG_INFINITY.predecessor_bytes().is_none());
488
489        let nan = f64::NAN;
490        assert!(nan.successor_bytes().is_none());
491        assert!(nan.predecessor_bytes().is_none());
492    }
493
494    #[test]
495    fn test_range_bounds() {
496        let n = 42i64;
497
498        // Test inclusive bounds
499        assert!(n.is_in_range(RangeBound::Included(&40), RangeBound::Included(&45)));
500        assert!(n.is_in_range(RangeBound::Included(&42), RangeBound::Included(&45)));
501        assert!(n.is_in_range(RangeBound::Included(&40), RangeBound::Included(&42)));
502
503        // Test exclusive bounds
504        assert!(n.is_in_range(RangeBound::Excluded(&40), RangeBound::Excluded(&43)));
505        assert!(!n.is_in_range(RangeBound::Excluded(&42), RangeBound::Excluded(&43)));
506
507        // Test mixed bounds
508        assert!(n.is_in_range(RangeBound::Included(&42), RangeBound::Excluded(&43)));
509        assert!(!n.is_in_range(RangeBound::Excluded(&41), RangeBound::Excluded(&42)));
510
511        // Test unbounded
512        assert!(n.is_in_range(RangeBound::Unbounded, RangeBound::Included(&45)));
513        assert!(n.is_in_range(RangeBound::Included(&40), RangeBound::Unbounded));
514        assert!(n.is_in_range(RangeBound::Unbounded, RangeBound::Unbounded));
515    }
516
517    #[test]
518    fn test_literal_i16_collation() {
519        let lit = ast::Literal::I16(100);
520        assert!(lit.successor_bytes().unwrap() > lit.to_bytes());
521        assert!(lit.predecessor_bytes().unwrap() < lit.to_bytes());
522        assert!(!lit.is_minimum());
523        assert!(!lit.is_maximum());
524
525        let max_lit = ast::Literal::I16(i16::MAX);
526        let min_lit = ast::Literal::I16(i16::MIN);
527        assert!(max_lit.successor_bytes().is_none());
528        assert!(min_lit.predecessor_bytes().is_none());
529        assert!(max_lit.is_maximum());
530        assert!(min_lit.is_minimum());
531    }
532
533    #[test]
534    fn test_literal_i32_collation() {
535        let lit = ast::Literal::I32(1000);
536        assert!(lit.successor_bytes().unwrap() > lit.to_bytes());
537        assert!(lit.predecessor_bytes().unwrap() < lit.to_bytes());
538        assert!(!lit.is_minimum());
539        assert!(!lit.is_maximum());
540
541        let max_lit = ast::Literal::I32(i32::MAX);
542        let min_lit = ast::Literal::I32(i32::MIN);
543        assert!(max_lit.successor_bytes().is_none());
544        assert!(min_lit.predecessor_bytes().is_none());
545        assert!(max_lit.is_maximum());
546        assert!(min_lit.is_minimum());
547    }
548
549    #[test]
550    fn test_literal_entity_id_collation() {
551        use ulid::Ulid;
552        let ulid = Ulid::new();
553        let lit = ast::Literal::EntityId(ulid);
554
555        // Test basic operations
556        assert!(!lit.is_minimum());
557        assert!(!lit.is_maximum());
558
559        // Test minimum ULID (all zeros)
560        let min_ulid = Ulid::from_bytes([0; 16]);
561        let min_lit = ast::Literal::EntityId(min_ulid);
562        assert!(min_lit.is_minimum());
563        assert!(min_lit.predecessor_bytes().is_none());
564
565        // Test maximum ULID (all 255s)
566        let max_ulid = Ulid::from_bytes([255; 16]);
567        let max_lit = ast::Literal::EntityId(max_ulid);
568        assert!(max_lit.is_maximum());
569        assert!(max_lit.successor_bytes().is_none());
570    }
571
572    #[test]
573    fn test_literal_binary_collation() {
574        let lit = ast::Literal::Binary(vec![1, 2, 3]);
575        assert!(lit.successor_bytes().unwrap() > lit.to_bytes());
576        assert!(lit.predecessor_bytes().unwrap() < lit.to_bytes());
577        assert!(!lit.is_minimum());
578        assert!(!lit.is_maximum());
579
580        let empty_lit = ast::Literal::Binary(vec![]);
581        assert!(empty_lit.is_minimum());
582        assert!(empty_lit.predecessor_bytes().is_none());
583        assert!(!empty_lit.is_maximum());
584    }
585
586    #[test]
587    fn test_literal_object_collation() {
588        let lit = ast::Literal::Object(vec![10, 20, 30]);
589        assert!(lit.successor_bytes().unwrap() > lit.to_bytes());
590        assert!(lit.predecessor_bytes().unwrap() < lit.to_bytes());
591        assert!(!lit.is_minimum());
592        assert!(!lit.is_maximum());
593
594        let empty_lit = ast::Literal::Object(vec![]);
595        assert!(empty_lit.is_minimum());
596        assert!(empty_lit.predecessor_bytes().is_none());
597        assert!(!empty_lit.is_maximum());
598    }
599}