1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
//! Traits and types for building trie-based indices. //! //! The trie structure has each each element of each layer indicate a range of elements //! in the next layer. Similarly, ranges of elements in the layer itself may correspond //! to single elements in the layer above. pub mod ordered; pub mod ordered_leaf; // pub mod hashed; // pub mod weighted; // pub mod unordered; /// A collection of tuples, and types for building and enumerating them. /// /// There are some implicit assumptions about the elements in trie-structured data, mostly that /// the items have some `(key, val)` structure. Perhaps we will nail these down better in the /// future and get a better name for the trait. pub trait Trie : ::std::marker::Sized { /// The type of item from which the type is constructed. type Item; /// The type of cursor used to navigate the type. type Cursor: Cursor<Self>; /// The type used to merge instances of the type together. type MergeBuilder: MergeBuilder<Trie=Self>; /// The type used to assemble instances of the type from its `Item`s. type TupleBuilder: TupleBuilder<Trie=Self, Item=Self::Item>; /// The number of distinct keys, as distinct from the total number of tuples. fn keys(&self) -> usize; /// The total number of tuples in the collection. fn tuples(&self) -> usize; /// Returns a cursor capable of navigating the collection. fn cursor(&self) -> Self::Cursor { self.cursor_from(0, self.keys()) } /// Returns a cursor over a range of data, commonly used by others to restrict navigation to /// sub-collections. fn cursor_from(&self, lower: usize, upper: usize) -> Self::Cursor; /// Merges two collections into a third. /// /// Collections are allowed their own semantics for merging. For example, unordered collections /// simply collect values, whereas weighted collections accumulate weights and discard elements /// whose weights are zero. fn merge(&self, other: &Self) -> Self { let mut merger = Self::MergeBuilder::with_capacity(self, other); // println!("{:?} and {:?}", self.keys(), other.keys()); merger.push_merge((self, 0, self.keys()), (other, 0, other.keys())); merger.done() } } /// A type used to assemble collections. pub trait Builder { /// The type of collection produced. type Trie: Trie; /// Requests a commitment to the offset of the current-most sub-collection. /// /// This is most often used by parent collections to indicate that some set of values are now /// logically distinct from the next set of values, and that the builder should acknowledge this /// and report the limit (to store as an offset in the parent collection). fn boundary(&mut self) -> usize; /// Finalizes the building process and returns the collection. fn done(self) -> Self::Trie; } /// A type used to assemble collections by merging other instances. pub trait MergeBuilder : Builder { /// Allocates an instance of the builder with sufficient capacity to contain the merged data. fn with_capacity(other1: &Self::Trie, other2: &Self::Trie) -> Self; /// Copies sub-collections of `other` into this collection. fn copy_range(&mut self, other: &Self::Trie, lower: usize, upper: usize); /// Merges two sub-collections into one sub-collection. fn push_merge(&mut self, other1: (&Self::Trie, usize, usize), other2: (&Self::Trie, usize, usize)) -> usize; } /// A type used to assemble collections from ordered sequences of tuples. pub trait TupleBuilder : Builder { /// The type of item accepted for construction. type Item; /// Allocates a new builder. fn new() -> Self; /// Allocates a new builder with capacity for at least `cap` tuples. fn with_capacity(cap: usize) -> Self; // <-- unclear how to set child capacities... /// Inserts a new into the collection. fn push_tuple(&mut self, tuple: Self::Item); } /// A type supporting navigation. /// /// The precise meaning of this navigation is not defined by the trait. It is likely that having /// navigated around, the cursor will be different in some other way, but the `Cursor` trait does /// not explain how this is so. pub trait Cursor<Storage> { /// The type revealed by the cursor. type Key; /// Reveals the current key. fn key<'a>(&self, storage: &'a Storage) -> &'a Self::Key; /// Advances the cursor by one element. fn step(&mut self, storage: &Storage); /// Advances the cursor until the location where `key` would be expected. fn seek(&mut self, storage: &Storage, key: &Self::Key); /// Returns `true` if the cursor points at valid data. Returns `false` if the cursor is exhausted. fn valid(&self, storage: &Storage) -> bool; /// Rewinds the cursor to its initial state. fn rewind(&mut self, storage: &Storage); /// Repositions the cursor to a different range of values. fn reposition(&mut self, storage: &Storage, lower: usize, upper: usize); } /// Reports the number of elements satisfing the predicate. /// /// This methods *relies strongly* on the assumption that the predicate /// stays false once it becomes false, a joint property of the predicate /// and the slice. This allows `advance` to use exponential search to /// count the number of elements in time logarithmic in the result. pub fn advance<T, F: Fn(&T)->bool>(slice: &[T], function: F) -> usize { let small_limit = 8; // Exponential seach if the answer isn't within `small_limit`. if slice.len() > small_limit && function(&slice[small_limit]) { // start with no advance let mut index = small_limit + 1; if index < slice.len() && function(&slice[index]) { // advance in exponentially growing steps. let mut step = 1; while index + step < slice.len() && function(&slice[index + step]) { index += step; step = step << 1; } // advance in exponentially shrinking steps. step = step >> 1; while step > 0 { if index + step < slice.len() && function(&slice[index + step]) { index += step; } step = step >> 1; } index += 1; } index } else { let limit = std::cmp::min(slice.len(), small_limit); slice[..limit].iter().filter(|x| function(*x)).count() } }