peoplesgrocers_lseq/
lib.rs

1use rand::Rng;
2#[cfg(feature = "serde")]
3use serde::{
4    de::{self, Visitor},
5    Deserialize, Serialize,
6};
7use std::error::Error;
8use std::fmt;
9use std::str::FromStr;
10
11const ALPHABET: &[u8] = b"-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
12
13/// Minimal RNG trait for LSEQ - only the methods we actually use.
14/// This allows custom implementations (e.g., recording wrappers) without
15/// implementing the full Rng trait.
16pub trait LseqRng {
17    fn random_bool(&mut self, p: f64) -> bool;
18    fn random_range(&mut self, range: std::ops::Range<u64>) -> u64;
19}
20
21/// Blanket implementation for anything that implements rand::Rng
22impl<R: Rng> LseqRng for R {
23    fn random_bool(&mut self, p: f64) -> bool {
24        Rng::random_bool(self, p)
25    }
26    fn random_range(&mut self, range: std::ops::Range<u64>) -> u64 {
27        Rng::random_range(self, range)
28    }
29}
30
31#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
32#[cfg_attr(feature = "serde", derive(Serialize))]
33#[cfg_attr(feature = "serde", serde(into = "String"))]
34pub struct SortKey {
35    /// Each element is a value at one level of the LSEQ tree.
36    /// Level n (0-indexed) holds values 0 to 64^(n+1) - 1, capped at 64^8.
37    ///
38    /// String encoding length grows as triangular numbers only up to depth 8:
39    ///   Depths 1-8: 1, 3, 6, 10, 15, 21, 28, 36 chars (triangular)
40    ///   Depths 9+:  44, 52, 60, 68, ... chars (+8 per level, linear)
41    ///
42    /// We cap at 64^8 = 2^48 per level for JavaScript float compatibility (2^53 max).
43    /// But we can still keep going deeper: even at 8 chars per level, the total
44    /// address space is (2^48)^depth which remains astronomically large.
45    numbers: Vec<u64>,
46}
47
48#[cfg(feature = "serde")]
49impl<'de> Deserialize<'de> for SortKey {
50    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
51    where
52        D: serde::Deserializer<'de>,
53    {
54        struct SortKeyVisitor;
55
56        impl<'de> Visitor<'de> for SortKeyVisitor {
57            type Value = SortKey;
58
59            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
60                formatter.write_str("a string containing valid sort key characters")
61            }
62
63            fn visit_str<E>(self, value: &str) -> Result<SortKey, E>
64            where
65                E: de::Error,
66            {
67                value.parse().map_err(|e| E::custom(e))
68            }
69        }
70
71        deserializer.deserialize_str(SortKeyVisitor)
72    }
73}
74
75/// Maximum exponent for level values, capped for JavaScript compatibility.
76/// JavaScript numbers are IEEE 754 floats with 53 bits of precision.
77/// 64^8 = 2^48, which is safely within 2^53.
78const MAX_LEVEL_EXPONENT: u32 = 8;
79
80impl SortKey {
81    pub fn from_numbers(numbers: Vec<u64>) -> Self {
82        SortKey { numbers }
83    }
84
85    /// Returns the maximum value for a given level (0-indexed).
86    /// Level 0: 64^1 - 1 = 63
87    /// Level 1: 64^2 - 1 = 4095
88    /// ...
89    /// Level 7+: 64^8 - 1 (capped for JS compatibility)
90    fn max_value_for_level(level: usize) -> u64 {
91        let exp = (level as u32 + 1).min(MAX_LEVEL_EXPONENT);
92        64u64.pow(exp) - 1
93    }
94
95    /// Returns the number of characters needed to encode a value at this level.
96    fn chars_for_level(level: usize) -> usize {
97        (level + 1).min(MAX_LEVEL_EXPONENT as usize)
98    }
99}
100
101impl From<SortKey> for Vec<u64> {
102    fn from(key: SortKey) -> Vec<u64> {
103        key.numbers
104    }
105}
106
107impl From<SortKey> for String {
108    fn from(key: SortKey) -> String {
109        key.to_string()
110    }
111}
112
113impl AsRef<[u64]> for SortKey {
114    fn as_ref(&self) -> &[u64] {
115        &self.numbers
116    }
117}
118
119impl From<String> for SortKey {
120    fn from(s: String) -> Self {
121        s.parse().unwrap_or_else(|_| SortKey { numbers: vec![0] })
122    }
123}
124
125impl fmt::Display for SortKey {
126    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
127        for (level, &value) in self.numbers.iter().enumerate() {
128            let num_chars = Self::chars_for_level(level);
129            let mut chars = Vec::with_capacity(num_chars);
130            let mut v = value;
131
132            // Extract digits from least significant to most significant
133            for _ in 0..num_chars {
134                chars.push(ALPHABET[(v % 64) as usize] as char);
135                v /= 64;
136            }
137
138            // Write in reverse (most significant first)
139            for c in chars.into_iter().rev() {
140                write!(f, "{}", c)?;
141            }
142        }
143        Ok(())
144    }
145}
146
147#[allow(dead_code)]
148#[derive(Debug)]
149pub struct LSEQ<R: LseqRng> {
150    strategies: Vec<bool>,
151    rng: R,
152}
153
154#[allow(dead_code)]
155impl<R: LseqRng> LSEQ<R> {
156    pub fn new(mut rng: R) -> Self {
157        let strategies = vec![rng.random_bool(0.5)];
158        LSEQ { strategies, rng }
159    }
160
161    /// Consume the LSEQ and return the inner RNG
162    pub fn take_rng(self) -> R {
163        self.rng
164    }
165
166    /// Allocate a new sort key between `before` and `after`.
167    ///
168    /// # The invariant
169    ///
170    /// This function guarantees you can always allocate a key that sorts before
171    /// any previously allocated key. This is essential for CRDT list insertion.
172    ///
173    /// # The encoding
174    ///
175    /// Keys use a 64-character alphabet where `'-'` is 0 and `'z'` is 63, chosen
176    /// so that `strcmp` matches numeric comparison. Keys are paths through an
177    /// LSEQ tree where each level has exponentially more space:
178    ///
179    /// ```text
180    /// Level 1:  64¹ values  →  1 char    "-", "0", ..., "z"
181    /// Level 2:  64² values  →  2 chars   "--", "-0", ..., "zz"
182    /// Level 3:  64³ values  →  3 chars   "---", "--0", ..., "zzz"
183    /// ```
184    ///
185    /// A path is encoded by concatenating each level's representation:
186    ///
187    /// ```text
188    /// [0]       = ["-"]              = "-"       (1 char)
189    /// [0, 1]    = ["-", "-0"]        = "--0"     (1 + 2 = 3 chars)
190    /// [0, 0]    = ["-", "--"]        = "---"     (1 + 2 = 3 chars)
191    /// [0, 0, 1] = ["-", "--", "--0"] = "-----0"  (1 + 2 + 3 = 6 chars)
192    /// ```
193    ///
194    /// # Why we go deeper than the LSEQ paper
195    ///
196    /// With `strcmp`, `"-"` == `"---"` == `"------"` in a crucial sense: nothing
197    /// can sort before any of them. All-zeros at any depth is "negative infinity".
198    ///
199    /// The LSEQ paper says: to insert before `[0, 1]` (= `"--0"`), use `[0, 0]`.
200    /// But `[0, 0]` = `"---"`, and nothing can ever sort before that!
201    ///
202    /// This implementation goes one level deeper to preserve the invariant:
203    ///
204    /// ```text
205    /// Insert before "--0" (i.e., [0, 1])?
206    ///   Paper says:  use [0, 0] = "---"           → dead end
207    ///   We say:      use [0, 0, X] = "---" + X    → can still prepend [0, 0, Y] where Y < X
208    /// ```
209    ///
210    /// The cost is longer keys, but we guarantee indefinite prepending.
211    pub fn alloc(&mut self, before: Option<&SortKey>, after: Option<&SortKey>) -> SortKey {
212        let p = before.map_or(vec![], |s| s.numbers.clone());
213        let q = after.map_or(vec![], |s| s.numbers.clone());
214
215        let mut depth = 0;
216        let mut result: Vec<u64> = Vec::new();
217
218        loop {
219            let p_val = p.get(depth).copied().unwrap_or(0);
220            let q_upper = q.get(depth).copied();
221            let level_max = SortKey::max_value_for_level(depth);
222
223            // Minimum allocatable (inclusive): one above the lower bound.
224            // This naturally reserves value 0 when p_val=0, ensuring we never
225            // allocate an all-zeros key. If we allocate [0, 1] and later need
226            // to prepend before it, we simply go deeper to get [0, 0, X].
227            let min_alloc = p_val + 1;
228
229            // Maximum allocatable (inclusive):
230            // - With upper bound: one below it
231            // - Without upper bound (after=None): full range for this level
232            let max_alloc = q_upper.map_or(level_max, |q| q.saturating_sub(1));
233
234            if min_alloc <= max_alloc {
235                let range = max_alloc - min_alloc + 1;
236                let offset = self.rng.random_range(0..range);
237                let new_value = if self.strategies[depth] {
238                    min_alloc + offset
239                } else {
240                    max_alloc - offset
241                };
242                result.push(new_value);
243                return SortKey::from_numbers(result);
244            }
245
246            // Descend to next level
247            result.push(p_val);
248
249            depth += 1;
250            if depth >= self.strategies.len() {
251                self.strategies.push(self.rng.random_bool(0.5));
252            }
253        }
254    }
255}
256
257#[derive(Debug)]
258pub enum SpacingError {
259    TooManyItems,
260}
261
262impl fmt::Display for SpacingError {
263    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
264        match self {
265            SpacingError::TooManyItems => write!(f, "Too many items to allocate"),
266        }
267    }
268}
269
270impl Error for SpacingError {}
271
272#[derive(Debug, Clone)]
273pub struct EvenSpacingIterator {
274    remaining_items: usize,
275    space_size: u64,
276    next_item: u64,
277    step_size_integer: u64, // Integer part of step size
278    step_size_error: f64,   // Fractional part of step size
279    error_accumulator: f64, // Accumulated error
280}
281
282impl EvenSpacingIterator {
283    // Static table of (64^k - 1) values for k from 1 to 8
284    // We subtract 1 because we reserve only the lower boundary (position 0, all "-"s).
285    // Position 0 cannot be used because nothing can be lexicographically less than it.
286    // The upper boundary (all "z"s) IS usable because we can always insert after it
287    // by extending: "zzz" < "zzza" lexicographically (prefix comparison).
288    // Capped at 64^8 = 2^48 for JavaScript number compatibility (max safe: 2^53).
289    const USABLE_SPACE: [usize; 8] = [
290        64 - 1,              // 64^1 - 1
291        4096 - 1,            // 64^2 - 1
292        262144 - 1,          // 64^3 - 1
293        16777216 - 1,        // 64^4 - 1
294        1073741824 - 1,      // 64^5 - 1
295        68719476736 - 1,     // 64^6 - 1
296        4398046511104 - 1,   // 64^7 - 1
297        281474976710656 - 1, // 64^8 - 1
298    ];
299
300    pub fn new(total_items: usize) -> Result<(u64, Self), SpacingError> {
301        if total_items == 0 {
302            return Err(SpacingError::TooManyItems);
303        }
304
305        // Find the smallest k where 64^k > total_items using the static table
306        let mut k = 0;
307        let mut space_size = 0;
308
309        for (index, &size) in Self::USABLE_SPACE.iter().enumerate() {
310            if size >= total_items {
311                k = index as u64 + 1; // k is 1-indexed
312                space_size = size;
313                break;
314            }
315        }
316
317        // If we couldn't find a suitable k, the request is too large
318        if k == 0 {
319            return Err(SpacingError::TooManyItems);
320        }
321
322        // Calculate step size split into integer and fractional parts
323        let step_size = (space_size as f64) / (total_items as f64);
324        let step_size_integer = step_size.floor() as u64;
325        let step_size_error = step_size - step_size_integer as f64;
326
327        Ok((
328            k,
329            EvenSpacingIterator {
330                remaining_items: total_items,
331                space_size: space_size.try_into().unwrap(),
332                next_item: 1,
333                step_size_integer,
334                step_size_error,
335                error_accumulator: 0.0,
336            },
337        ))
338    }
339
340    /// Convert a position within a level-k space to a SortKey.
341    ///
342    /// Creates a k-level key where levels 0 through k-2 are 0, and level k-1
343    /// contains the position value.
344    ///
345    /// Example: position_to_key(2, 1) = [0, 1] which displays as "--0"
346    pub fn position_to_key(k: u64, position: u64) -> SortKey {
347        let mut result = Vec::with_capacity(k as usize);
348
349        // Levels 0 through k-2 are 0
350        for _ in 0..k.saturating_sub(1) {
351            result.push(0);
352        }
353
354        // Level k-1 contains the position
355        if k > 0 {
356            result.push(position);
357        }
358
359        SortKey::from_numbers(result)
360    }
361}
362
363impl Iterator for EvenSpacingIterator {
364    type Item = u64;
365
366    fn next(&mut self) -> Option<Self::Item> {
367        if self.remaining_items == 0 {
368            return None;
369        }
370
371        if self.next_item > self.space_size {
372            return None;
373        }
374
375        let current_position = self.next_item;
376        self.remaining_items -= 1;
377
378        self.next_item += self.step_size_integer;
379
380        self.error_accumulator += self.step_size_error;
381        if self.error_accumulator >= 1.0 {
382            self.next_item += 1;
383            self.error_accumulator -= 1.0;
384        }
385
386        Some(current_position)
387    }
388}
389
390#[derive(Debug)]
391pub enum SortKeyParseError {
392    InvalidCharacter(char),
393    InvalidLength(usize),
394}
395
396impl fmt::Display for SortKeyParseError {
397    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
398        match self {
399            SortKeyParseError::InvalidCharacter(c) => write!(
400                f,
401                "Invalid character '{}' in sort key. Expected characters from alphabet: {}",
402                c,
403                String::from_utf8_lossy(ALPHABET)
404            ),
405            SortKeyParseError::InvalidLength(len) => write!(
406                f,
407                "Invalid sort key length {}. Expected triangular number up to 36 (1, 3, 6, 10, 15, 21, 28, 36), then +8 per level (44, 52, 60, ...)",
408                len
409            ),
410        }
411    }
412}
413
414impl Error for SortKeyParseError {}
415
416impl FromStr for SortKey {
417    type Err = SortKeyParseError;
418
419    fn from_str(s: &str) -> Result<Self, Self::Err> {
420        let bytes = s.as_bytes();
421        let mut numbers = Vec::new();
422        let mut pos = 0;
423        let mut level = 0;
424
425        while pos < bytes.len() {
426            let num_chars = SortKey::chars_for_level(level);
427            if pos + num_chars > bytes.len() {
428                return Err(SortKeyParseError::InvalidLength(bytes.len()));
429            }
430
431            // Parse num_chars characters as a base-64 number
432            let mut value: u64 = 0;
433            for i in 0..num_chars {
434                let b = bytes[pos + i];
435                let digit = ALPHABET
436                    .iter()
437                    .position(|&x| x == b)
438                    .ok_or(SortKeyParseError::InvalidCharacter(b as char))?;
439                value = value * 64 + digit as u64;
440            }
441
442            numbers.push(value);
443            pos += num_chars;
444            level += 1;
445        }
446
447        Ok(SortKey { numbers })
448    }
449}
450
451#[cfg(test)]
452mod tests {
453    use super::*;
454    use rand::rngs::StdRng;
455    use rand::SeedableRng;
456
457    /// Helper to create a SortKey from a slice of numbers
458    fn key(nums: &[u64]) -> SortKey {
459        SortKey::from_numbers(nums.to_vec())
460    }
461
462    #[test]
463    fn test_compare_lseq() {
464        // Single-character keys are level-0 values (0-63)
465        // "a" is position 38 in alphabet, "b" is 39
466        let a = "-".parse::<SortKey>().unwrap(); // value 0
467        let b = "0".parse::<SortKey>().unwrap(); // value 1
468        assert!(a < b);
469        assert!(!(b < a));
470        assert!(!(a < a));
471    }
472
473    #[test]
474    fn test_lseq_alloc() {
475        let rng = StdRng::seed_from_u64(42); // Deterministic RNG for testing
476        let mut lseq = LSEQ::new(rng);
477        let id1 = lseq.alloc(None, None);
478        let id2 = lseq.alloc(Some(&id1), None);
479        let id3 = lseq.alloc(Some(&id1), Some(&id2));
480
481        assert!(id1 < id2);
482        assert!(id1 < id3);
483        assert!(id3 < id2);
484    }
485
486    #[test]
487    fn test_position_to_key() {
488        // k=2 means 2 levels: level 0 (1 char) + level 1 (2 chars) = 3 chars total
489        // position 1 goes into level 1, with level 0 = 0
490        // [0, 1] = "-" + "-0" = "--0"
491        const K: u64 = 2;
492        assert_eq!(
493            EvenSpacingIterator::position_to_key(K, 1).to_string(),
494            "--0"
495        );
496
497        // k=1 means just level 0 (1 char)
498        // position 1 = "0" (alphabet[1])
499        assert_eq!(
500            EvenSpacingIterator::position_to_key(1, 1).to_string(),
501            "0"
502        );
503
504        // k=2, position 4095 (max for level 1 = 64² - 1)
505        // [0, 4095] = "-" + "zz" = "-zz"
506        assert_eq!(
507            EvenSpacingIterator::position_to_key(2, 4095).to_string(),
508            "-zz"
509        );
510    }
511
512    #[test]
513    fn test_even_spacing_4093() {
514        let (k, mut iter) = EvenSpacingIterator::new(4093).unwrap();
515        assert_eq!(k, 2);
516        let mut positions = Vec::new();
517        for pos in iter.by_ref() {
518            // Use by_ref() to borrow instead of consume
519            positions.push(pos);
520        }
521
522        // Print all generated sort keys
523        //println!("\nGenerated sort keys for 62 positions:");
524        //for (i, pos) in positions.iter().enumerate() {
525        //    let key = EvenSpacingIterator::position_to_key(k, *pos);
526        //    println!("Position {}: {} (numeric: {})", i, key, pos);
527        //}
528        println!("{:?}", iter);
529
530        assert_eq!(positions.len(), 4093);
531    }
532
533    #[test]
534    fn test_even_spacing_6() {
535        let (k, mut iter) = EvenSpacingIterator::new(6).unwrap();
536        eprintln!("Created iterator with k={}", k);
537        let mut positions = Vec::new();
538        let mut count = 0;
539        while let Some(pos) = iter.next() {
540            count += 1;
541            eprintln!("Iteration {}: Got position {}", count, pos);
542            positions.push(pos);
543        }
544        eprintln!("Final iterator state: {:?}", iter);
545        assert_eq!(
546            positions.len(),
547            6,
548            "Expected 6 positions, got {}",
549            positions.len()
550        );
551    }
552
553    /// Test the "go deeper" strategy for prepending before left-edge keys
554    #[test]
555    fn test_prepend_before_left_edge() {
556        let rng = StdRng::seed_from_u64(123);
557        let mut lseq = LSEQ::new(rng);
558
559        // Prepend before [0, 1] -> should get [0, 0, X]
560        let target = key(&[0, 1]);
561        let result = lseq.alloc(None, Some(&target));
562        assert!(result < target);
563        assert_eq!(result.numbers.len(), 3);
564        assert_eq!(result.numbers[0], 0);
565        assert_eq!(result.numbers[1], 0);
566
567        // Prepend before [0, 0, 1] -> should get [0, 0, 0, X]
568        let target = key(&[0, 0, 1]);
569        let result = lseq.alloc(None, Some(&target));
570        assert!(result < target);
571        assert_eq!(result.numbers.len(), 4);
572
573        // Prepend before [0, 0, 0, 1] -> should get [0, 0, 0, 0, X]
574        let target = key(&[0, 0, 0, 1]);
575        let result = lseq.alloc(None, Some(&target));
576        assert!(result < target);
577        assert_eq!(result.numbers.len(), 5);
578    }
579
580    /// Verify the ordering: [0, 0, X] < [0, 1] for any X
581    #[test]
582    fn test_left_edge_ordering() {
583        assert!(key(&[0, 0, 63]) < key(&[0, 1]));
584        assert!(key(&[0, 0, 1]) < key(&[0, 1]));
585        assert!(key(&[0, 0, 0, 63]) < key(&[0, 0, 1]));
586    }
587
588    /// Verify roundtrip: display -> parse -> display gives same result
589    #[test]
590    fn test_roundtrip_encoding() {
591        let cases = vec![
592            key(&[0]),                    // "-"
593            key(&[63]),                   // "z"
594            key(&[0, 0]),                 // "---"
595            key(&[0, 1]),                 // "--0"
596            key(&[0, 4095]),              // "-zz"
597            key(&[1, 0]),                 // "0--"
598            key(&[0, 0, 0]),              // "------"
599            key(&[0, 0, 1]),              // "-----0"
600            key(&[0, 0, 262143]),         // "---zzz"
601        ];
602
603        for original in cases {
604            let s = original.to_string();
605            let parsed: SortKey = s.parse().expect(&format!("Failed to parse '{}'", s));
606            assert_eq!(
607                original.numbers, parsed.numbers,
608                "Roundtrip failed for {:?} -> '{}' -> {:?}",
609                original.numbers, s, parsed.numbers
610            );
611        }
612    }
613
614    /// Verify string encoding matches expected format
615    #[test]
616    fn test_string_encoding() {
617        // Level 0 only (1 char)
618        assert_eq!(key(&[0]).to_string(), "-");
619        assert_eq!(key(&[1]).to_string(), "0");
620        assert_eq!(key(&[63]).to_string(), "z");
621
622        // Level 0 + Level 1 (1 + 2 = 3 chars)
623        assert_eq!(key(&[0, 0]).to_string(), "---");
624        assert_eq!(key(&[0, 1]).to_string(), "--0");
625        assert_eq!(key(&[0, 64]).to_string(), "-0-"); // 64 = 1*64 + 0
626        assert_eq!(key(&[0, 4095]).to_string(), "-zz");
627
628        // Level 0 + Level 1 + Level 2 (1 + 2 + 3 = 6 chars)
629        assert_eq!(key(&[0, 0, 0]).to_string(), "------");
630        assert_eq!(key(&[0, 0, 1]).to_string(), "-----0");
631        assert_eq!(key(&[0, 0, 262143]).to_string(), "---zzz");
632    }
633}