bisection_key/
lexicon_key.rs

1//! Ordered keys that can be inserted between any two of them infinitely.
2//! like **fractional indexes**, but using variable-length string with custom Ord implementation.
3//!
4//! Using a `[0, 64]` charset, total length: 3 + 10 + 26 + 26 = 65
5//!
6//! ```text
7//! +-/0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
8//! ```
9//!
10//! 65 was picked since it's easier to bisect 0~64 at 32, and 0~32 at 16, etc.
11//!
12//! Generated key matches lexiongraphic order.
13
14use std::cmp::{max, Ordering};
15use std::fmt::Display;
16
17const CHARSET: &str = "+-/0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
18
19/// create it like:
20/// ```rust
21/// let _  = bisection_key::LexiconKey::new("a");
22/// ```
23#[derive(Debug)]
24pub struct LexiconKey(Vec<u8>);
25
26impl Default for LexiconKey {
27  fn default() -> Self {
28    Self::new("T").unwrap()
29  }
30}
31
32impl Eq for LexiconKey {}
33
34/// missing length are filled with `32`s, then compare like a vector
35impl PartialEq for LexiconKey {
36  fn eq(&self, other: &LexiconKey) -> bool {
37    let xs = &self.0;
38    let ys = &other.0;
39    let size = max(xs.len(), ys.len());
40    for idx in 0..size {
41      let x = u8_or_neg(xs.get(idx));
42      let y = u8_or_neg(ys.get(idx));
43      if x != y {
44        return false;
45      }
46    }
47    true
48  }
49}
50
51/// missing length are filled with `32`s, then compare like a vector
52impl Ord for LexiconKey {
53  fn cmp(&self, other: &LexiconKey) -> Ordering {
54    let xs = &self.0;
55    let ys = &other.0;
56    let size = max(xs.len(), ys.len());
57    for idx in 0..size {
58      let x = u8_or_neg(xs.get(idx));
59      let y = u8_or_neg(ys.get(idx));
60      match x.cmp(&y) {
61        Ordering::Equal => continue,
62        x => return x,
63      }
64    }
65    Ordering::Equal
66  }
67}
68
69impl PartialOrd for LexiconKey {
70  fn partial_cmp(&self, other: &LexiconKey) -> Option<Ordering> {
71    Some(self.cmp(other))
72  }
73}
74
75impl Display for LexiconKey {
76  fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
77    let mut buf: String = String::new();
78    for i in &self.0 {
79      buf.push(CHARSET.chars().nth(*i as usize).unwrap());
80    }
81    write!(f, "{}", buf)
82  }
83}
84
85impl LexiconKey {
86  pub fn new(s: &str) -> Result<Self, String> {
87    let mut buf: Vec<u8> = vec![];
88    for c in s.chars() {
89      match CHARSET.find(c) {
90        Some(i) => buf.push(i as u8),
91        None => return Err(format!("invalid character for bisection key: {:?}", c)),
92      }
93    }
94    Ok(LexiconKey(buf))
95  }
96
97  pub fn bisect(&self, next: &Self) -> Result<Self, String> {
98    let mut mid: Vec<u8> = vec![];
99
100    // println!("bisecting {:?} and {:?}", self, next);
101
102    let mut change: Option<NumberChange> = None;
103
104    for i in 0..max(self.0.len(), next.0.len()) {
105      let curr = self.0.get(i).or(Some(&0)).unwrap();
106      let edge = next.0.get(i).or(Some(&0)).unwrap();
107
108      let delta = *edge as i8 - *curr as i8;
109
110      // println!(
111      //   "i:{} curr:{} edge:{} delta:{} change:{:?} mid:{:?}",
112      //   i, *curr, *edge, delta, change, mid
113      // );
114
115      match change {
116        None => {
117          if delta == 0 {
118            mid.push(*curr);
119          } else if delta >= 2 || delta <= -2 {
120            mid.push((edge + curr) >> 1);
121            return Self(mid).checked();
122          } else if delta == 1 {
123            change = Some(NumberChange::Increased);
124            mid.push(curr.to_owned());
125          } else if delta == -1 {
126            change = Some(NumberChange::Decreased);
127            mid.push(curr.to_owned());
128          } else {
129            unreachable!("call cases are exhaustive");
130          }
131        }
132        Some(NumberChange::Increased) => {
133          let reach = 64 - *curr;
134          // println!("edge:{} reach:{}", edge, reach);
135          match edge.cmp(&reach) {
136            Ordering::Greater => {
137              if *edge == 1 {
138                // which means current is 64, edge is 1
139                mid.push(0);
140                mid = promote_from(mid, i, NumberChange::Increased)?;
141              } else {
142                mid.push((edge - reach) >> 1);
143                mid = promote_from(mid, i, NumberChange::Increased)?;
144                return Self(mid).checked();
145              }
146            }
147            Ordering::Equal => {
148              mid.push(64);
149              if reach == 0 {
150                // need to bisect again
151              } else {
152                return Self(mid).checked();
153              }
154            }
155            Ordering::Less => {
156              if reach == 1 {
157                mid.push(64);
158              } else {
159                mid.push((*curr + edge + 64) >> 1);
160              }
161              return Self(mid).checked();
162            }
163          }
164        }
165
166        Some(NumberChange::Decreased) => {
167          let reach = 64 - edge;
168          match reach.cmp(curr) {
169            Ordering::Less => {
170              if *curr == 1 {
171                // which means current is 1, edge is 64
172                mid.push(0);
173              } else {
174                mid.push((*curr - reach) >> 1);
175                return Self(mid).checked();
176              }
177            }
178            Ordering::Equal => {
179              mid.push(0);
180              if reach == 0 {
181                // need to bisect again
182              } else {
183                return Self(mid).checked();
184              }
185            }
186            Ordering::Greater => {
187              if reach == 1 {
188                // which means current is 0, edge is 63
189                mid.push(64);
190                mid = promote_from(mid, i, NumberChange::Decreased)?;
191              } else {
192                mid.push((*curr + edge + 64) >> 1);
193              }
194              return Self(mid).checked();
195            }
196          }
197        }
198      }
199    }
200
201    match change {
202      None => Err(format!(
203        "not found property way of generating middle key: {} {}",
204        self, next
205      )),
206      Some(NumberChange::Increased) => {
207        // leave some spaces: 0 1 2 3
208        mid.push(4);
209        Self(mid).checked()
210      }
211      Some(NumberChange::Decreased) => {
212        // leave some spaces: 61 62 63 64
213        let size = mid.len();
214        mid.push(60);
215        mid = promote_from(mid, size, NumberChange::Decreased)?;
216        Self(mid).checked()
217      }
218    }
219  }
220
221  pub fn bisect_end(&self) -> Result<Self, String> {
222    let mut ys: Vec<u8> = vec![];
223    for item in &self.0 {
224      if *item > 64 {
225        return Err(format!("invalid key: {}", self));
226      } else if *item == 64 {
227        ys.push(64);
228      } else if *item == 63 {
229        ys.push(64);
230        return Self(ys).checked();
231      } else if *item == 62 {
232        ys.push(63);
233        return Self(ys).checked();
234      } else {
235        // max 61
236        ys.push(*item + 2);
237        return Self(ys).checked();
238      }
239    }
240    ys.push(32 + 2);
241    Self(ys).checked()
242  }
243
244  pub fn bisect_beginning(&self) -> Result<Self, String> {
245    let mut ys: Vec<u8> = vec![];
246    for item in &self.0 {
247      if *item > 64 {
248        return Err(format!("invalid key: {}", self));
249      } else if *item == 0 {
250        ys.push(0);
251      } else if *item == 1 {
252        ys.push(0);
253        // leave some space here: "61 62 63 64"
254        ys.push(60);
255        return Self(ys).checked();
256      } else if *item == 2 {
257        ys.push(1);
258        return Self(ys).checked();
259      } else {
260        // min 2
261        ys.push(*item - 2);
262        return Self(ys).checked();
263      }
264    }
265    Err(format!(
266      "trailing 0 is invalid during bisect_beggining: {}",
267      self
268    ))
269  }
270
271  pub fn promote_from(&self, idx: usize, change: NumberChange) -> Result<Self, String> {
272    Ok(Self(promote_from(self.0.to_owned(), idx, change)?))
273  }
274
275  pub fn checked(self) -> Result<Self, String> {
276    // println!("checking {:?}", self);
277    // check
278    for i in 0..self.0.len() {
279      if self.0[i] > 64 {
280        return Err(format!(
281          "invalid character for bisection key: {:?}",
282          self.0[i]
283        ));
284      }
285    }
286    Ok(self)
287  }
288}
289
290#[derive(Eq, PartialEq, Debug, Clone, Copy)]
291pub enum NumberChange {
292  Increased,
293  Decreased,
294}
295
296fn promote_from(base: Vec<u8>, origin_idx: usize, change: NumberChange) -> Result<Vec<u8>, String> {
297  if origin_idx == 0 {
298    return Err(format!("cannot promote from 0 of: {:?}", base));
299  }
300  let idx = origin_idx - 1;
301  if idx >= base.len() {
302    Err(format!("index out of range: {:?} {}", base, idx))
303  } else {
304    let mut xs = base.to_owned();
305    let mut pos = idx;
306    loop {
307      if change == NumberChange::Decreased {
308        if xs[pos] == 0 {
309          xs[pos] = 64;
310        } else {
311          xs[pos] -= 1;
312          break Ok(xs);
313        }
314      } else if xs[pos] == 64 {
315        xs[pos] = 0;
316      } else {
317        xs[pos] += 1;
318        break Ok(xs);
319      }
320
321      if pos > 0 {
322        pos -= 1;
323      } else {
324        return Err(format!("not found position to promote: {:?}", base));
325      }
326    }
327  }
328}
329
330/// -1 for None
331fn u8_or_neg(x: Option<&u8>) -> i64 {
332  match x {
333    Some(x) => *x as i64,
334    None => -1,
335  }
336}