bstree_file_readonly/
lib.rs

1//! Library implementing a read-only  binary search tree stored in a file.
2//!  
3//! Entries are tuples made of one identifier plus one value.
4//! Searches are performed on the values: they typically are values stored in a table column while
5//! identifiers are rows identifiers (e.g. simple row indices).
6//!
7//! Node size should be such that it fits into L1 data cache.
8//! Typical values (for cache core):
9//! * L1 cache: 32 kB
10//! * L2 cache: 256 kB
11//! * L3 cache: 1 MB (6 MB shared between 4 cores)
12//! * HDD cache: 8 MB
13//! I.e: L2 = 8 * L1; L3 = 4 * L2 = 32 * L1
14//! I.e: HDD = 256 * L1
15//!
16//! In DBMS, the page size is <= L1 cache size
17//!
18//! If designed for HDD, we want to avoid random access (5ms seek time):
19//! * we thus prefer to load large parts of the file in RAM
20//!     - we favor a single root node (kept in cache), and an array of leaf nodes
21//! * each node stored on the disk must be divided into sub-nodes no larger than the L1 cache capacity
22//!
23//! If you are looking for a index that can be modified (add and remove key/value pair), you should
24//! have a look at [sled](https://sled.rs/) and its [git repo](https://github.com/spacejam/sled).
25//! They cite [this paper](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/bw-tree-icde2013-final.pdf)
26//! I should definitely have a look at it!!
27// L1Node<E> = simple &[E] of size nL1 such that size_of<E> * nL1 < 90% of L1 cache size
28// (nL1InL3 - 1) * size_of<E> (1 + nL1) < L3 cache size
29// L3Node = &[E] of size (nL1InL3 - 1) + nL1InL3 * L1Node<E>
30// HDDNode = &[E] of size (nL3InHDDN - 1) + nL3InHDDN * L3Node
31
32// We recall that: 2^0 + 2^1 + 2^2 + ... + 2^n = 2^(n+1) - 1 = size of a sub-tree
33
34use serde::{Deserialize, Serialize};
35
36use std::{
37  cmp::Ordering::{self, Equal, Greater, Less},
38  fmt::{Debug, Display},
39  io::{Cursor, ErrorKind, Read, Write},
40  marker::PhantomData,
41  str::FromStr,
42};
43
44pub mod bstree;
45pub mod cliargs;
46pub mod float;
47pub mod mk;
48pub mod rw;
49pub mod visitors;
50
51use float::FiniteFloat;
52use rw::*;
53
54pub trait FromU64: Sized {
55  fn from_u64(s: u64) -> Self;
56  fn to_u64(&self) -> u64;
57}
58
59impl FromU64 for u32 {
60  fn from_u64(s: u64) -> Self {
61    s as u32
62  }
63  fn to_u64(&self) -> u64 {
64    *self as u64
65  }
66}
67
68impl FromU64 for u64 {
69  fn from_u64(s: u64) -> Self {
70    s
71  }
72  fn to_u64(&self) -> u64 {
73    *self
74  }
75}
76
77impl FromU64 for String {
78  fn from_u64(s: u64) -> Self {
79    format!("{}", &s)
80  }
81  fn to_u64(&self) -> u64 {
82    panic!("Can't convert string into u64")
83  }
84}
85
86/// Trait defining the minimum requirements to be an identifier
87/// * `FromU64` is used to be able to generate the identifier from a line number
88pub trait Id: FromStr + FromU64 + Display + Debug + Clone + Send {}
89impl<T> Id for T where T: FromStr + FromU64 + Display + Debug + Clone + Send {}
90
91/// Trait defining the minimum requirements to be a value
92pub trait Val: FromStr + Clone + Ord + Display + Debug + Clone + Send {}
93impl<T> Val for T where T: FromStr + Clone + Ord + Display + Debug + Clone + Send {}
94
95#[derive(Debug)]
96pub enum IdInMemType {
97  U32,
98  U64,
99  Str { n_chars: usize },
100}
101
102#[derive(Clone, Debug, Serialize, Deserialize)]
103pub enum IdType {
104  U24, //(U24RW),
105  U32,
106  U40,
107  U48,
108  U56,
109  U64,
110  Str { n_chars: usize },
111  Custom, // To be written into the file, but need a specific code
112}
113
114impl IdType {
115  pub fn is_recno_compatible(&self) -> bool {
116    matches!(
117      self,
118      IdType::U24 | IdType::U32 | IdType::U40 | IdType::U48 | IdType::U56 | IdType::U64
119    )
120  }
121
122  pub fn byte_size(&self) -> usize {
123    match self {
124      IdType::U24 => 3,
125      IdType::U32 => 4,
126      IdType::U40 => 5,
127      IdType::U48 => 6,
128      IdType::U56 => 7,
129      IdType::U64 => 8,
130      IdType::Str { n_chars } => *n_chars,
131      IdType::Custom => panic!("Can't be used with Id type Custom"),
132    }
133  }
134
135  pub fn in_mem_type(&self) -> IdInMemType {
136    match self {
137      IdType::U24 | IdType::U32 => IdInMemType::U32,
138      IdType::U40 | IdType::U48 | IdType::U56 | IdType::U64 => IdInMemType::U64,
139      IdType::Str { n_chars } => IdInMemType::Str { n_chars: *n_chars },
140      IdType::Custom => panic!("Can't be used with Id type Custom"),
141    }
142  }
143}
144
145impl FromStr for IdType {
146  type Err = String;
147
148  /// Get an identifier type from a String
149  fn from_str(id_type: &str) -> Result<Self, Self::Err> {
150    let c: char = id_type[0..1].parse().unwrap();
151    let n_bytes: usize = id_type[1..].parse().unwrap();
152    match (c, n_bytes) {
153      ('u', 3) => Ok(IdType::U24),
154      ('u', 4) => Ok(IdType::U32),
155      ('u', 5) => Ok(IdType::U40),
156      ('u', 6) => Ok(IdType::U48),
157      ('u', 7) => Ok(IdType::U56),
158      ('u', 8) => Ok(IdType::U64),
159      ('t', nb) => Ok(IdType::Str { n_chars: nb }),
160      _ => Err(format!(
161        "Could not parse id type: '{}'. Must match 'u[3-8]' or 't[0-9]+'.",
162        &id_type
163      )),
164    }
165  }
166}
167
168#[derive(Debug)]
169pub enum ValInMemType {
170  U32,
171  U64,
172  I32,
173  I64,
174  F32,
175  F64,
176  Str { n_chars: usize },
177}
178
179#[derive(Clone, Debug, Serialize, Deserialize)]
180pub enum ValType {
181  U24,
182  U32,
183  U40,
184  U48,
185  U56,
186  U64,
187  I24,
188  I32,
189  I40,
190  I48,
191  I56,
192  I64,
193  F32,
194  F64,
195  Str { n_chars: usize },
196  Custom, // Handled externally
197}
198
199impl ValType {
200  pub fn byte_size(&self) -> usize {
201    match self {
202      ValType::U24 | ValType::I24 => 3,
203      ValType::U32 | ValType::I32 | ValType::F32 => 4,
204      ValType::U40 | ValType::I40 => 5,
205      ValType::U48 | ValType::I48 => 6,
206      ValType::U56 | ValType::I56 => 7,
207      ValType::U64 | ValType::I64 | ValType::F64 => 8,
208      ValType::Str { n_chars } => *n_chars,
209      ValType::Custom => panic!("Can't be used with Id type Custom"),
210    }
211  }
212
213  pub fn in_mem_type(&self) -> ValInMemType {
214    match self {
215      ValType::U24 | ValType::U32 => ValInMemType::U32,
216      ValType::U40 | ValType::U48 | ValType::U56 | ValType::U64 => ValInMemType::U64,
217      ValType::I24 | ValType::I32 => ValInMemType::I32,
218      ValType::I40 | ValType::I48 | ValType::I56 | ValType::I64 => ValInMemType::I64,
219      ValType::F32 => ValInMemType::F32,
220      ValType::F64 => ValInMemType::F64,
221      ValType::Str { n_chars } => ValInMemType::Str { n_chars: *n_chars },
222      ValType::Custom => panic!("Can't be used with Id type Custom"),
223    }
224  }
225}
226
227impl FromStr for ValType {
228  type Err = String;
229
230  /// Get a value type from a String
231  fn from_str(val_type: &str) -> Result<Self, Self::Err> {
232    let err = || {
233      format!(
234        "Could not parse id type: '{}'. Must match 'u[3-8]', 'i[3-8]', 'f[48]' or 't[0-9]+'.",
235        &val_type
236      )
237    };
238    let c: char = val_type[0..1].parse().map_err(|_| err())?;
239    let n_bytes: usize = val_type[1..].parse().map_err(|_| err())?;
240    match (c, n_bytes) {
241      ('u', 3) => Ok(ValType::U24),
242      ('u', 4) => Ok(ValType::U32),
243      ('u', 5) => Ok(ValType::U40),
244      ('u', 6) => Ok(ValType::U48),
245      ('u', 7) => Ok(ValType::U56),
246      ('u', 8) => Ok(ValType::U64),
247      ('i', 3) => Ok(ValType::I24),
248      ('i', 4) => Ok(ValType::I32),
249      ('i', 5) => Ok(ValType::I40),
250      ('i', 6) => Ok(ValType::I48),
251      ('i', 7) => Ok(ValType::I56),
252      ('i', 8) => Ok(ValType::I64),
253      ('f', 4) => Ok(ValType::F32),
254      ('f', 8) => Ok(ValType::F64),
255      ('t', nb) => Ok(ValType::Str { n_chars: nb }),
256      _ => Err(err()),
257    }
258  }
259}
260
261/// Defines an action which has to read and/or write given identifier and value types.
262/// It is made to be used with the `IdVal` type.
263/// The reason behind is that `IdVal` will contains the giant `match` for all possible
264/// (IdType, ValType) tuples.And we want to write this `match` only once!
265pub trait Process {
266  type Output;
267
268  fn exec<I, V, D, IRW, VRW>(
269    self,
270    types: IdVal,
271    id_rw: IRW,
272    val_rw: VRW,
273    dist: D,
274  ) -> Result<Self::Output, std::io::Error>
275  where
276    I: 'static + Id,
277    V: 'static + Val,
278    D: 'static + Fn(&V, &V) -> V + Send,
279    IRW: 'static + ReadWrite<Type = I>,
280    VRW: 'static + ReadWrite<Type = V>;
281}
282
283#[derive(Clone, Debug, Serialize, Deserialize)]
284pub struct IdVal(IdType, ValType);
285
286impl IdVal {
287  pub fn id_type(&self) -> &IdType {
288    &self.0
289  }
290
291  pub fn val_type(&self) -> &ValType {
292    &self.1
293  }
294
295  pub fn exec<P>(&self, p: P) -> Result<P::Output, std::io::Error>
296  // P::Output
297  where
298    P: Process,
299  {
300    // let ds = |_a: &String, _b: String| panic!("No string distance!");
301    // Here we use static dispatch with monomorphization
302    // - pro: one version of the code per possible tuple => very good performances!!
303    // - con: one version of the code per possible tuple => slow compilation + compiled code may be large!!
304
305    /*
306    #!/bin/bash
307    id="u24 u32 u40 u48 u56 u64 str"
308    val="u24 u32 u40 u48 u56 u64 i24 i32 i40 i48 i56 i64 f32 f64 str"
309    for i in ${id}; do
310      for v in ${val}; do
311        echo "#[cfg(feature = \"${i}_${v}\")]"
312      done
313    done
314    for i in ${id}; do
315      for v in ${val}; do
316        echo "${i}_${v} = []"
317      done
318    done
319    */
320
321    match (&self.0, &self.1) {
322      // IdType U24, ValType: All
323      #[cfg(feature = "u24_u24")]
324      (IdType::U24, ValType::U24) => p.exec(self.clone(), U24RW, U24RW, |a: &u32, b: &u32| {
325        if *a > *b {
326          *a - *b
327        } else {
328          *b - *a
329        }
330      }),
331      #[cfg(feature = "u24_u32")]
332      (IdType::U24, ValType::U32) => p.exec(self.clone(), U24RW, U32RW, |a: &u32, b: &u32| {
333        if *a > *b {
334          *a - *b
335        } else {
336          *b - *a
337        }
338      }),
339      #[cfg(feature = "u24_u40")]
340      (IdType::U24, ValType::U40) => p.exec(self.clone(), U24RW, U40RW, |a: &u64, b: &u64| {
341        if *a > *b {
342          *a - *b
343        } else {
344          *b - *a
345        }
346      }),
347      #[cfg(feature = "u24_u48")]
348      (IdType::U24, ValType::U48) => p.exec(self.clone(), U24RW, U48RW, |a: &u64, b: &u64| {
349        if *a > *b {
350          *a - *b
351        } else {
352          *b - *a
353        }
354      }),
355      #[cfg(feature = "u24_u56")]
356      (IdType::U24, ValType::U56) => p.exec(self.clone(), U24RW, U56RW, |a: &u64, b: &u64| {
357        if *a > *b {
358          *a - *b
359        } else {
360          *b - *a
361        }
362      }),
363      #[cfg(feature = "u24_u64")]
364      (IdType::U24, ValType::U64) => p.exec(self.clone(), U24RW, U64RW, |a: &u64, b: &u64| {
365        if *a > *b {
366          *a - *b
367        } else {
368          *b - *a
369        }
370      }),
371
372      #[cfg(feature = "u24_i24")]
373      (IdType::U24, ValType::I24) => {
374        p.exec(self.clone(), U24RW, I24RW, |a: &i32, b: &i32| (a - b).abs())
375      }
376      #[cfg(feature = "u24_i32")]
377      (IdType::U24, ValType::I32) => {
378        p.exec(self.clone(), U24RW, I32RW, |a: &i32, b: &i32| (a - b).abs())
379      }
380      #[cfg(feature = "u24_i40")]
381      (IdType::U24, ValType::I40) => {
382        p.exec(self.clone(), U24RW, I40RW, |a: &i64, b: &i64| (a - b).abs())
383      }
384      #[cfg(feature = "u24_i48")]
385      (IdType::U24, ValType::I48) => {
386        p.exec(self.clone(), U24RW, I48RW, |a: &i64, b: &i64| (a - b).abs())
387      }
388      #[cfg(feature = "u24_i56")]
389      (IdType::U24, ValType::I56) => {
390        p.exec(self.clone(), U24RW, I56RW, |a: &i64, b: &i64| (a - b).abs())
391      }
392      #[cfg(feature = "u24_i64")]
393      (IdType::U24, ValType::I64) => {
394        p.exec(self.clone(), U24RW, I64RW, |a: &i64, b: &i64| (a - b).abs())
395      }
396
397      #[cfg(feature = "u24_f32")]
398      (IdType::U24, ValType::F32) => p.exec(
399        self.clone(),
400        U24RW,
401        F32RW,
402        |a: &FiniteFloat<f32>, b: &FiniteFloat<f32>| {
403          FiniteFloat::new((a.get() - b.get()).abs()).unwrap()
404        },
405      ),
406      #[cfg(feature = "u24_f64")]
407      (IdType::U24, ValType::F64) => p.exec(
408        self.clone(),
409        U24RW,
410        F64RW,
411        |a: &FiniteFloat<f64>, b: &FiniteFloat<f64>| {
412          FiniteFloat::new((a.get() - b.get()).abs()).unwrap()
413        },
414      ),
415
416      #[cfg(feature = "u24_str")]
417      (IdType::U24, ValType::Str { n_chars }) => p.exec(
418        self.clone(),
419        U24RW,
420        StrRW { n_bytes: *n_chars },
421        |_a: &String, _b: &String| panic!("Distance not implemented for Strings"),
422      ),
423
424      // IdType U32, ValType: All
425      #[cfg(feature = "u32_u24")]
426      (IdType::U32, ValType::U24) => p.exec(self.clone(), U32RW, U24RW, |a: &u32, b: &u32| {
427        if *a > *b {
428          *a - *b
429        } else {
430          *b - *a
431        }
432      }),
433      #[cfg(feature = "u32_u32")]
434      (IdType::U32, ValType::U32) => p.exec(self.clone(), U32RW, U32RW, |a: &u32, b: &u32| {
435        if *a > *b {
436          *a - *b
437        } else {
438          *b - *a
439        }
440      }),
441      #[cfg(feature = "u32_u40")]
442      (IdType::U32, ValType::U40) => p.exec(self.clone(), U32RW, U40RW, |a: &u64, b: &u64| {
443        if *a > *b {
444          *a - *b
445        } else {
446          *b - *a
447        }
448      }),
449      #[cfg(feature = "u32_u48")]
450      (IdType::U32, ValType::U48) => p.exec(self.clone(), U32RW, U48RW, |a: &u64, b: &u64| {
451        if *a > *b {
452          *a - *b
453        } else {
454          *b - *a
455        }
456      }),
457      #[cfg(feature = "u32_u56")]
458      (IdType::U32, ValType::U56) => p.exec(self.clone(), U32RW, U56RW, |a: &u64, b: &u64| {
459        if *a > *b {
460          *a - *b
461        } else {
462          *b - *a
463        }
464      }),
465      #[cfg(feature = "u32_u64")]
466      (IdType::U32, ValType::U64) => p.exec(self.clone(), U32RW, U64RW, |a: &u64, b: &u64| {
467        if *a > *b {
468          *a - *b
469        } else {
470          *b - *a
471        }
472      }),
473
474      #[cfg(feature = "u32_i24")]
475      (IdType::U32, ValType::I24) => {
476        p.exec(self.clone(), U32RW, I24RW, |a: &i32, b: &i32| (a - b).abs())
477      }
478      #[cfg(feature = "u32_i32")]
479      (IdType::U32, ValType::I32) => {
480        p.exec(self.clone(), U32RW, I32RW, |a: &i32, b: &i32| (a - b).abs())
481      }
482      #[cfg(feature = "u32_i40")]
483      (IdType::U32, ValType::I40) => {
484        p.exec(self.clone(), U32RW, I40RW, |a: &i64, b: &i64| (a - b).abs())
485      }
486      #[cfg(feature = "u32_i48")]
487      (IdType::U32, ValType::I48) => {
488        p.exec(self.clone(), U32RW, I48RW, |a: &i64, b: &i64| (a - b).abs())
489      }
490      #[cfg(feature = "u32_i56")]
491      (IdType::U32, ValType::I56) => {
492        p.exec(self.clone(), U32RW, I56RW, |a: &i64, b: &i64| (a - b).abs())
493      }
494      #[cfg(feature = "u32_i64")]
495      (IdType::U32, ValType::I64) => {
496        p.exec(self.clone(), U32RW, I64RW, |a: &i64, b: &i64| (a - b).abs())
497      }
498
499      #[cfg(feature = "u32_f32")]
500      (IdType::U32, ValType::F32) => p.exec(
501        self.clone(),
502        U32RW,
503        F32RW,
504        |a: &FiniteFloat<f32>, b: &FiniteFloat<f32>| {
505          FiniteFloat::new((a.get() - b.get()).abs()).unwrap()
506        },
507      ),
508      #[cfg(feature = "u32_f64")]
509      (IdType::U32, ValType::F64) => p.exec(
510        self.clone(),
511        U32RW,
512        F64RW,
513        |a: &FiniteFloat<f64>, b: &FiniteFloat<f64>| {
514          FiniteFloat::new((a.get() - b.get()).abs()).unwrap()
515        },
516      ),
517
518      #[cfg(feature = "u32_str")]
519      (IdType::U32, ValType::Str { n_chars }) => p.exec(
520        self.clone(),
521        U32RW,
522        StrRW { n_bytes: *n_chars },
523        |_a: &String, _b: &String| panic!("Distance not implemented for Strings"),
524      ),
525
526      // IdType U40, ValType: All
527      #[cfg(feature = "u40_u24")]
528      (IdType::U40, ValType::U24) => p.exec(self.clone(), U40RW, U24RW, |a: &u32, b: &u32| {
529        if *a > *b {
530          *a - *b
531        } else {
532          *b - *a
533        }
534      }),
535      #[cfg(feature = "u40_u32")]
536      (IdType::U40, ValType::U32) => p.exec(self.clone(), U40RW, U32RW, |a: &u32, b: &u32| {
537        if *a > *b {
538          *a - *b
539        } else {
540          *b - *a
541        }
542      }),
543      #[cfg(feature = "u40_u40")]
544      (IdType::U40, ValType::U40) => p.exec(self.clone(), U40RW, U40RW, |a: &u64, b: &u64| {
545        if *a > *b {
546          *a - *b
547        } else {
548          *b - *a
549        }
550      }),
551      #[cfg(feature = "u40_u48")]
552      (IdType::U40, ValType::U48) => p.exec(self.clone(), U40RW, U48RW, |a: &u64, b: &u64| {
553        if *a > *b {
554          *a - *b
555        } else {
556          *b - *a
557        }
558      }),
559      #[cfg(feature = "u40_u56")]
560      (IdType::U40, ValType::U56) => p.exec(self.clone(), U40RW, U56RW, |a: &u64, b: &u64| {
561        if *a > *b {
562          *a - *b
563        } else {
564          *b - *a
565        }
566      }),
567      #[cfg(feature = "u40_u64")]
568      (IdType::U40, ValType::U64) => p.exec(self.clone(), U40RW, U64RW, |a: &u64, b: &u64| {
569        if *a > *b {
570          *a - *b
571        } else {
572          *b - *a
573        }
574      }),
575
576      #[cfg(feature = "u40_i24")]
577      (IdType::U40, ValType::I24) => {
578        p.exec(self.clone(), U40RW, I24RW, |a: &i32, b: &i32| (a - b).abs())
579      }
580      #[cfg(feature = "u40_i32")]
581      (IdType::U40, ValType::I32) => {
582        p.exec(self.clone(), U40RW, I32RW, |a: &i32, b: &i32| (a - b).abs())
583      }
584      #[cfg(feature = "u40_i40")]
585      (IdType::U40, ValType::I40) => {
586        p.exec(self.clone(), U40RW, I40RW, |a: &i64, b: &i64| (a - b).abs())
587      }
588      #[cfg(feature = "u40_i48")]
589      (IdType::U40, ValType::I48) => {
590        p.exec(self.clone(), U40RW, I48RW, |a: &i64, b: &i64| (a - b).abs())
591      }
592      #[cfg(feature = "u40_i56")]
593      (IdType::U40, ValType::I56) => {
594        p.exec(self.clone(), U40RW, I56RW, |a: &i64, b: &i64| (a - b).abs())
595      }
596      #[cfg(feature = "u40_i64")]
597      (IdType::U40, ValType::I64) => {
598        p.exec(self.clone(), U40RW, I64RW, |a: &i64, b: &i64| (a - b).abs())
599      }
600
601      #[cfg(feature = "u40_f32")]
602      (IdType::U40, ValType::F32) => p.exec(
603        self.clone(),
604        U40RW,
605        F32RW,
606        |a: &FiniteFloat<f32>, b: &FiniteFloat<f32>| {
607          FiniteFloat::new((a.get() - b.get()).abs()).unwrap()
608        },
609      ),
610      #[cfg(feature = "u40_f64")]
611      (IdType::U40, ValType::F64) => p.exec(
612        self.clone(),
613        U40RW,
614        F64RW,
615        |a: &FiniteFloat<f64>, b: &FiniteFloat<f64>| {
616          FiniteFloat::new((a.get() - b.get()).abs()).unwrap()
617        },
618      ),
619
620      #[cfg(feature = "u40_str")]
621      (IdType::U40, ValType::Str { n_chars }) => p.exec(
622        self.clone(),
623        U40RW,
624        StrRW { n_bytes: *n_chars },
625        |_a: &String, _b: &String| panic!("Distance not implemented for Strings"),
626      ),
627
628      // IdType U48, ValType: All
629      #[cfg(feature = "u48_u24")]
630      (IdType::U48, ValType::U24) => p.exec(self.clone(), U48RW, U24RW, |a: &u32, b: &u32| {
631        if *a > *b {
632          *a - *b
633        } else {
634          *b - *a
635        }
636      }),
637      #[cfg(feature = "u48_u32")]
638      (IdType::U48, ValType::U32) => p.exec(self.clone(), U48RW, U32RW, |a: &u32, b: &u32| {
639        if *a > *b {
640          *a - *b
641        } else {
642          *b - *a
643        }
644      }),
645      #[cfg(feature = "u48_u40")]
646      (IdType::U48, ValType::U40) => p.exec(self.clone(), U48RW, U40RW, |a: &u64, b: &u64| {
647        if *a > *b {
648          *a - *b
649        } else {
650          *b - *a
651        }
652      }),
653      #[cfg(feature = "u48_u48")]
654      (IdType::U48, ValType::U48) => p.exec(self.clone(), U48RW, U48RW, |a: &u64, b: &u64| {
655        if *a > *b {
656          *a - *b
657        } else {
658          *b - *a
659        }
660      }),
661      #[cfg(feature = "u48_u56")]
662      (IdType::U48, ValType::U56) => p.exec(self.clone(), U48RW, U56RW, |a: &u64, b: &u64| {
663        if *a > *b {
664          *a - *b
665        } else {
666          *b - *a
667        }
668      }),
669      #[cfg(feature = "u48_u64")]
670      (IdType::U48, ValType::U64) => p.exec(self.clone(), U48RW, U64RW, |a: &u64, b: &u64| {
671        if *a > *b {
672          *a - *b
673        } else {
674          *b - *a
675        }
676      }),
677
678      #[cfg(feature = "u48_i24")]
679      (IdType::U48, ValType::I24) => {
680        p.exec(self.clone(), U48RW, I24RW, |a: &i32, b: &i32| (a - b).abs())
681      }
682      #[cfg(feature = "u48_i32")]
683      (IdType::U48, ValType::I32) => {
684        p.exec(self.clone(), U48RW, I32RW, |a: &i32, b: &i32| (a - b).abs())
685      }
686      #[cfg(feature = "u48_i40")]
687      (IdType::U48, ValType::I40) => {
688        p.exec(self.clone(), U48RW, I40RW, |a: &i64, b: &i64| (a - b).abs())
689      }
690      #[cfg(feature = "u48_i48")]
691      (IdType::U48, ValType::I48) => {
692        p.exec(self.clone(), U48RW, I48RW, |a: &i64, b: &i64| (a - b).abs())
693      }
694      #[cfg(feature = "u48_i56")]
695      (IdType::U48, ValType::I56) => {
696        p.exec(self.clone(), U48RW, I56RW, |a: &i64, b: &i64| (a - b).abs())
697      }
698      #[cfg(feature = "u48_i64")]
699      (IdType::U48, ValType::I64) => {
700        p.exec(self.clone(), U48RW, I64RW, |a: &i64, b: &i64| (a - b).abs())
701      }
702
703      #[cfg(feature = "u48_f32")]
704      (IdType::U48, ValType::F32) => p.exec(
705        self.clone(),
706        U48RW,
707        F32RW,
708        |a: &FiniteFloat<f32>, b: &FiniteFloat<f32>| {
709          FiniteFloat::new((a.get() - b.get()).abs()).unwrap()
710        },
711      ),
712      #[cfg(feature = "u48_f64")]
713      (IdType::U48, ValType::F64) => p.exec(
714        self.clone(),
715        U48RW,
716        F64RW,
717        |a: &FiniteFloat<f64>, b: &FiniteFloat<f64>| {
718          FiniteFloat::new((a.get() - b.get()).abs()).unwrap()
719        },
720      ),
721
722      #[cfg(feature = "u48_str")]
723      (IdType::U48, ValType::Str { n_chars }) => p.exec(
724        self.clone(),
725        U48RW,
726        StrRW { n_bytes: *n_chars },
727        |_a: &String, _b: &String| panic!("Distance not implemented for Strings"),
728      ),
729
730      // IdType U56, ValType: All
731      #[cfg(feature = "u56_u24")]
732      (IdType::U56, ValType::U24) => p.exec(self.clone(), U56RW, U24RW, |a: &u32, b: &u32| {
733        if *a > *b {
734          *a - *b
735        } else {
736          *b - *a
737        }
738      }),
739      #[cfg(feature = "u56_u32")]
740      (IdType::U56, ValType::U32) => p.exec(self.clone(), U56RW, U32RW, |a: &u32, b: &u32| {
741        if *a > *b {
742          *a - *b
743        } else {
744          *b - *a
745        }
746      }),
747      #[cfg(feature = "u56_u40")]
748      (IdType::U56, ValType::U40) => p.exec(self.clone(), U56RW, U40RW, |a: &u64, b: &u64| {
749        if *a > *b {
750          *a - *b
751        } else {
752          *b - *a
753        }
754      }),
755      #[cfg(feature = "u56_u48")]
756      (IdType::U56, ValType::U48) => p.exec(self.clone(), U56RW, U48RW, |a: &u64, b: &u64| {
757        if *a > *b {
758          *a - *b
759        } else {
760          *b - *a
761        }
762      }),
763      #[cfg(feature = "u56_u64")]
764      (IdType::U56, ValType::U56) => p.exec(self.clone(), U56RW, U56RW, |a: &u64, b: &u64| {
765        if *a > *b {
766          *a - *b
767        } else {
768          *b - *a
769        }
770      }),
771      #[cfg(feature = "u56_u64")]
772      (IdType::U56, ValType::U64) => p.exec(self.clone(), U56RW, U64RW, |a: &u64, b: &u64| {
773        if *a > *b {
774          *a - *b
775        } else {
776          *b - *a
777        }
778      }),
779
780      #[cfg(feature = "u56_i24")]
781      (IdType::U56, ValType::I24) => {
782        p.exec(self.clone(), U56RW, I24RW, |a: &i32, b: &i32| (a - b).abs())
783      }
784      #[cfg(feature = "u56_i32")]
785      (IdType::U56, ValType::I32) => {
786        p.exec(self.clone(), U56RW, I32RW, |a: &i32, b: &i32| (a - b).abs())
787      }
788      #[cfg(feature = "u56_i40")]
789      (IdType::U56, ValType::I40) => {
790        p.exec(self.clone(), U56RW, I40RW, |a: &i64, b: &i64| (a - b).abs())
791      }
792      #[cfg(feature = "u56_i48")]
793      (IdType::U56, ValType::I48) => {
794        p.exec(self.clone(), U56RW, I48RW, |a: &i64, b: &i64| (a - b).abs())
795      }
796      #[cfg(feature = "u56_i56")]
797      (IdType::U56, ValType::I56) => {
798        p.exec(self.clone(), U56RW, I56RW, |a: &i64, b: &i64| (a - b).abs())
799      }
800      #[cfg(feature = "u56_i64")]
801      (IdType::U56, ValType::I64) => {
802        p.exec(self.clone(), U56RW, I64RW, |a: &i64, b: &i64| (a - b).abs())
803      }
804
805      #[cfg(feature = "u56_f32")]
806      (IdType::U56, ValType::F32) => p.exec(
807        self.clone(),
808        U56RW,
809        F32RW,
810        |a: &FiniteFloat<f32>, b: &FiniteFloat<f32>| {
811          FiniteFloat::new((a.get() - b.get()).abs()).unwrap()
812        },
813      ),
814      #[cfg(feature = "u56_f64")]
815      (IdType::U56, ValType::F64) => p.exec(
816        self.clone(),
817        U56RW,
818        F64RW,
819        |a: &FiniteFloat<f64>, b: &FiniteFloat<f64>| {
820          FiniteFloat::new((a.get() - b.get()).abs()).unwrap()
821        },
822      ),
823
824      #[cfg(feature = "u56_str")]
825      (IdType::U56, ValType::Str { n_chars }) => p.exec(
826        self.clone(),
827        U56RW,
828        StrRW { n_bytes: *n_chars },
829        |_a: &String, _b: &String| panic!("Distance not implemented for Strings"),
830      ),
831
832      // IdType U64, ValType: All
833      #[cfg(feature = "u64_u24")]
834      (IdType::U64, ValType::U24) => p.exec(self.clone(), U64RW, U24RW, |a: &u32, b: &u32| {
835        if *a > *b {
836          *a - *b
837        } else {
838          *b - *a
839        }
840      }),
841      #[cfg(feature = "u64_u32")]
842      (IdType::U64, ValType::U32) => p.exec(self.clone(), U64RW, U32RW, |a: &u32, b: &u32| {
843        if *a > *b {
844          *a - *b
845        } else {
846          *b - *a
847        }
848      }),
849      #[cfg(feature = "u64_u40")]
850      (IdType::U64, ValType::U40) => p.exec(self.clone(), U64RW, U40RW, |a: &u64, b: &u64| {
851        if *a > *b {
852          *a - *b
853        } else {
854          *b - *a
855        }
856      }),
857      #[cfg(feature = "u64_u48")]
858      (IdType::U64, ValType::U48) => p.exec(self.clone(), U64RW, U48RW, |a: &u64, b: &u64| {
859        if *a > *b {
860          *a - *b
861        } else {
862          *b - *a
863        }
864      }),
865      #[cfg(feature = "u64_u56")]
866      (IdType::U64, ValType::U56) => p.exec(self.clone(), U64RW, U56RW, |a: &u64, b: &u64| {
867        if *a > *b {
868          *a - *b
869        } else {
870          *b - *a
871        }
872      }),
873      #[cfg(feature = "u64_u64")]
874      (IdType::U64, ValType::U64) => p.exec(self.clone(), U64RW, U64RW, |a: &u64, b: &u64| {
875        if *a > *b {
876          *a - *b
877        } else {
878          *b - *a
879        }
880      }),
881
882      #[cfg(feature = "u64_i24")]
883      (IdType::U64, ValType::I24) => {
884        p.exec(self.clone(), U64RW, I24RW, |a: &i32, b: &i32| (a - b).abs())
885      }
886      #[cfg(feature = "u64_i32")]
887      (IdType::U64, ValType::I32) => {
888        p.exec(self.clone(), U64RW, I32RW, |a: &i32, b: &i32| (a - b).abs())
889      }
890      #[cfg(feature = "u64_i40")]
891      (IdType::U64, ValType::I40) => {
892        p.exec(self.clone(), U64RW, I40RW, |a: &i64, b: &i64| (a - b).abs())
893      }
894      #[cfg(feature = "u64_i48")]
895      (IdType::U64, ValType::I48) => {
896        p.exec(self.clone(), U64RW, I48RW, |a: &i64, b: &i64| (a - b).abs())
897      }
898      #[cfg(feature = "u64_i56")]
899      (IdType::U64, ValType::I56) => {
900        p.exec(self.clone(), U64RW, I56RW, |a: &i64, b: &i64| (a - b).abs())
901      }
902      #[cfg(feature = "u64_i64")]
903      (IdType::U64, ValType::I64) => {
904        p.exec(self.clone(), U64RW, I64RW, |a: &i64, b: &i64| (a - b).abs())
905      }
906
907      #[cfg(feature = "u64_f32")]
908      (IdType::U64, ValType::F32) => p.exec(
909        self.clone(),
910        U64RW,
911        F32RW,
912        |a: &FiniteFloat<f32>, b: &FiniteFloat<f32>| {
913          FiniteFloat::new((a.get() - b.get()).abs()).unwrap()
914        },
915      ),
916      #[cfg(feature = "u64_f64")]
917      (IdType::U64, ValType::F64) => p.exec(
918        self.clone(),
919        U64RW,
920        F64RW,
921        |a: &FiniteFloat<f64>, b: &FiniteFloat<f64>| {
922          FiniteFloat::new((a.get() - b.get()).abs()).unwrap()
923        },
924      ),
925
926      #[cfg(feature = "u64_str")]
927      (IdType::U64, ValType::Str { n_chars }) => p.exec(
928        self.clone(),
929        U64RW,
930        StrRW { n_bytes: *n_chars },
931        |_a: &String, _b: &String| panic!("Distance not implemented for Strings"),
932      ),
933
934      // IdType Str, ValType: All
935      #[cfg(feature = "str_u24")]
936      (IdType::Str { n_chars }, ValType::U24) => p.exec(
937        self.clone(),
938        StrRW { n_bytes: *n_chars },
939        U24RW,
940        |a: &u32, b: &u32| if *a > *b { *a - *b } else { *b - *a },
941      ),
942      #[cfg(feature = "str_u32")]
943      (IdType::Str { n_chars }, ValType::U32) => p.exec(
944        self.clone(),
945        StrRW { n_bytes: *n_chars },
946        U32RW,
947        |a: &u32, b: &u32| if *a > *b { *a - *b } else { *b - *a },
948      ),
949      #[cfg(feature = "str_u40")]
950      (IdType::Str { n_chars }, ValType::U40) => p.exec(
951        self.clone(),
952        StrRW { n_bytes: *n_chars },
953        U40RW,
954        |a: &u64, b: &u64| if *a > *b { *a - *b } else { *b - *a },
955      ),
956      #[cfg(feature = "str_u48")]
957      (IdType::Str { n_chars }, ValType::U48) => p.exec(
958        self.clone(),
959        StrRW { n_bytes: *n_chars },
960        U48RW,
961        |a: &u64, b: &u64| if *a > *b { *a - *b } else { *b - *a },
962      ),
963      #[cfg(feature = "str_u56")]
964      (IdType::Str { n_chars }, ValType::U56) => p.exec(
965        self.clone(),
966        StrRW { n_bytes: *n_chars },
967        U56RW,
968        |a: &u64, b: &u64| if *a > *b { *a - *b } else { *b - *a },
969      ),
970      #[cfg(feature = "str_u64")]
971      (IdType::Str { n_chars }, ValType::U64) => p.exec(
972        self.clone(),
973        StrRW { n_bytes: *n_chars },
974        U64RW,
975        |a: &u64, b: &u64| if *a > *b { *a - *b } else { *b - *a },
976      ),
977
978      #[cfg(feature = "str_i24")]
979      (IdType::Str { n_chars }, ValType::I24) => p.exec(
980        self.clone(),
981        StrRW { n_bytes: *n_chars },
982        I24RW,
983        |a: &i32, b: &i32| (a - b).abs(),
984      ),
985      #[cfg(feature = "str_i32")]
986      (IdType::Str { n_chars }, ValType::I32) => p.exec(
987        self.clone(),
988        StrRW { n_bytes: *n_chars },
989        I32RW,
990        |a: &i32, b: &i32| (a - b).abs(),
991      ),
992      #[cfg(feature = "str_i40")]
993      (IdType::Str { n_chars }, ValType::I40) => p.exec(
994        self.clone(),
995        StrRW { n_bytes: *n_chars },
996        I40RW,
997        |a: &i64, b: &i64| (a - b).abs(),
998      ),
999      #[cfg(feature = "str_i48")]
1000      (IdType::Str { n_chars }, ValType::I48) => p.exec(
1001        self.clone(),
1002        StrRW { n_bytes: *n_chars },
1003        I48RW,
1004        |a: &i64, b: &i64| (a - b).abs(),
1005      ),
1006      #[cfg(feature = "str_i56")]
1007      (IdType::Str { n_chars }, ValType::I56) => p.exec(
1008        self.clone(),
1009        StrRW { n_bytes: *n_chars },
1010        I56RW,
1011        |a: &i64, b: &i64| (a - b).abs(),
1012      ),
1013      #[cfg(feature = "str_i64")]
1014      (IdType::Str { n_chars }, ValType::I64) => p.exec(
1015        self.clone(),
1016        StrRW { n_bytes: *n_chars },
1017        I64RW,
1018        |a: &i64, b: &i64| (a - b).abs(),
1019      ),
1020
1021      #[cfg(feature = "str_f32")]
1022      (IdType::Str { n_chars }, ValType::F32) => p.exec(
1023        self.clone(),
1024        StrRW { n_bytes: *n_chars },
1025        F32RW,
1026        |a: &FiniteFloat<f32>, b: &FiniteFloat<f32>| {
1027          FiniteFloat::new((a.get() - b.get()).abs()).unwrap()
1028        },
1029      ),
1030      #[cfg(feature = "str_f64")]
1031      (IdType::Str { n_chars }, ValType::F64) => p.exec(
1032        self.clone(),
1033        StrRW { n_bytes: *n_chars },
1034        F64RW,
1035        |a: &FiniteFloat<f64>, b: &FiniteFloat<f64>| {
1036          FiniteFloat::new((a.get() - b.get()).abs()).unwrap()
1037        },
1038      ),
1039
1040      #[cfg(feature = "str_str")]
1041      (IdType::Str { n_chars: n_chars_i }, ValType::Str { n_chars: n_chars_v }) => p.exec(
1042        self.clone(),
1043        StrRW {
1044          n_bytes: *n_chars_i,
1045        },
1046        StrRW {
1047          n_bytes: *n_chars_v,
1048        },
1049        |_a: &String, _b: &String| panic!("Distance not implemented for Strings"),
1050      ),
1051
1052      _ => Err(std::io::Error::new(
1053        ErrorKind::Other,
1054        "Case not supported! See crate features!!",
1055      )),
1056    }
1057  }
1058}
1059
1060#[derive(Debug, Clone)]
1061pub struct EntryOpt<I: Id, V: Val> {
1062  /// Row identifier
1063  pub id: I,
1064  /// Indexed value
1065  pub val: Option<V>,
1066}
1067
1068#[derive(Debug, Clone)]
1069pub struct Entry<I: Id, V: Val> {
1070  /// Row identifier
1071  pub id: I,
1072  /// Indexed value
1073  pub val: V,
1074}
1075
1076impl<I: Id, V: Val> Entry<I, V> {
1077  fn read<R, IRW, VRW>(
1078    reader: &mut R,
1079    id_codec: &IRW,
1080    val_codec: &VRW,
1081  ) -> Result<Self, std::io::Error>
1082  where
1083    R: Read,
1084    IRW: ReadWrite<Type = I>,
1085    VRW: ReadWrite<Type = V>,
1086  {
1087    id_codec
1088      .read(reader)
1089      .and_then(|id| val_codec.read(reader).map(|val| Entry { id, val }))
1090  }
1091
1092  fn write<W, IRW, VRW>(
1093    &self,
1094    writer: &mut W,
1095    id_codec: &IRW,
1096    val_codec: &VRW,
1097  ) -> Result<(), std::io::Error>
1098  where
1099    W: Write,
1100    IRW: ReadWrite<Type = I>,
1101    VRW: ReadWrite<Type = V>,
1102  {
1103    id_codec
1104      .write(writer, &self.id)
1105      .and_then(|()| val_codec.write(writer, &self.val))
1106  }
1107}
1108
1109impl<I: Id, V: Val> PartialOrd for Entry<I, V> {
1110  fn partial_cmp(&self, other: &Entry<I, V>) -> Option<Ordering> {
1111    Some(self.cmp(other))
1112  }
1113}
1114
1115impl<I: Id, V: Val> Ord for Entry<I, V> {
1116  fn cmp(&self, other: &Entry<I, V>) -> Ordering {
1117    self.val.cmp(&other.val)
1118  }
1119}
1120
1121impl<I: Id, V: Val> PartialEq for Entry<I, V> {
1122  fn eq(&self, other: &Entry<I, V>) -> bool {
1123    self.val == other.val
1124  }
1125}
1126
1127impl<I: Id, V: Val> Eq for Entry<I, V> where V: Ord {}
1128
1129pub struct EntryRaw<'a, I, V>
1130where
1131  V: PartialOrd,
1132{
1133  raw: &'a [u8],
1134  id: PhantomData<&'a I>,
1135  val: PhantomData<&'a V>,
1136}
1137
1138impl<'a, I, V> EntryRaw<'a, I, V>
1139where
1140  V: PartialOrd,
1141{
1142  pub fn get_id<IRW, VRW>(&self, id_codec: &IRW, _val_codec: &VRW) -> Result<I, std::io::Error>
1143  where
1144    IRW: ReadWrite<Type = I>,
1145    VRW: ReadWrite<Type = V>,
1146  {
1147    id_codec.read(&mut Cursor::new(self.raw))
1148  }
1149
1150  pub fn get_val<IRW, VRW>(&self, id_codec: &IRW, val_codec: &VRW) -> Result<V, std::io::Error>
1151  where
1152    IRW: ReadWrite<Type = I>,
1153    VRW: ReadWrite<Type = V>,
1154  {
1155    let mut cursor = Cursor::new(self.raw);
1156    cursor.set_position(id_codec.n_bytes() as u64);
1157    val_codec.read(&mut cursor)
1158  }
1159}
1160
1161pub struct RawEntries<'a, I, V, IRW, VRW>
1162where
1163  I: Id,
1164  V: Val,
1165  IRW: ReadWrite<Type = I>,
1166  VRW: ReadWrite<Type = V>,
1167{
1168  raw: Cursor<&'a [u8]>,
1169  id_rw: &'a IRW,
1170  val_rw: &'a VRW,
1171  entry_byte_size: usize,
1172  n_entries: usize,
1173}
1174
1175impl<'a, I, V, IRW, VRW> RawEntries<'a, I, V, IRW, VRW>
1176where
1177  I: Id,
1178  V: Val,
1179  IRW: ReadWrite<Type = I>,
1180  VRW: ReadWrite<Type = V>,
1181{
1182  pub fn new(raw: &'a [u8], id_rw: &'a IRW, val_rw: &'a VRW) -> RawEntries<'a, I, V, IRW, VRW> {
1183    assert!(!raw.is_empty());
1184    let entry_byte_size = id_rw.n_bytes() + val_rw.n_bytes();
1185    let n_entries = raw.len() / entry_byte_size;
1186    RawEntries {
1187      raw: Cursor::new(raw),
1188      id_rw,
1189      val_rw,
1190      entry_byte_size,
1191      n_entries,
1192    }
1193  }
1194
1195  pub fn n_entries(&self) -> usize {
1196    // self.raw.get_ref().len() / self.entry_byte_size
1197    self.n_entries
1198  }
1199
1200  // For better performances, have a look at raw pointers!!
1201  fn get_val(&mut self, index: usize) -> Result<V, std::io::Error> {
1202    let pos = (self.entry_byte_size * index + self.id_rw.n_bytes()) as u64;
1203    self.raw.set_position(pos);
1204    self.val_rw.read(&mut self.raw)
1205  }
1206
1207  // For better performances, have a look at raw pointers!!
1208  fn get_entry(&mut self, index: usize) -> Result<Entry<I, V>, std::io::Error> {
1209    self.raw.set_position((self.entry_byte_size * index) as u64);
1210    Entry::read(&mut self.raw, self.id_rw, self.val_rw)
1211  }
1212
1213  pub fn binary_search(&mut self, val: &V) -> Result<Result<usize, usize>, std::io::Error> {
1214    // Code taken from Rust slice binary_search:
1215    // https://doc.rust-lang.org/src/core/slice/mod.rs.html#1470-1474
1216    let mut size = self.n_entries();
1217    let mut base = 0_usize;
1218    while size > 1 {
1219      let half = size >> 1;
1220      let mid = base + half;
1221      // mid is always in [0, size), that means mid is >= 0 and < size.
1222      // mid >= 0: by definition
1223      // mid < size: mid = size / 2 + size / 4 + size / 8 ...
1224      let cur_val = self.get_val(mid)?;
1225      let cmp = cur_val.cmp(val);
1226      base = if cmp == Greater { base } else { mid };
1227      size -= half;
1228    }
1229    // base is always in [0, size) because base <= mid.
1230    self.get_val(base).map(|v| {
1231      let cmp = v.cmp(val);
1232      if cmp == Equal {
1233        Ok(base)
1234      } else {
1235        Err(base + (cmp == Less) as usize)
1236      }
1237    })
1238  }
1239}
1240
1241// datastruct:
1242// - meta
1243// - null values block (only identifiers, sequentially ordered by `id`)
1244// - values blocks key,val pairs (ordered by `val` blocks)