1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
use ahtable::*;

pub mod dense;

const BURST_THRESHOLD: usize = 16384;

/// All possible type of hat-trie node.
#[derive(Clone, Debug)]
pub enum NodeType<K, T, V> 
where K: Copy + core::hash::Hash + PartialEq + PartialOrd + Sized,
      Box<[K]>: Clone + PartialEq,
      T: TrieNode<K, V>,
      V: Clone {
    /// Empty type. This is only used for temporary value replacement.
    /// For example, when splitting node.
    None,
    /// Sub layer of trie which is also an object of type trie.
    Trie(T),
    /// Pure container type. It can have only one parent trie node.
    Pure(ArrayHash<twox_hash::XxHash64, Box<[K]>, V>),
    /// Hybrid contaainer type. It can have more than one parent trie node.
    Hybrid((ArrayHash<twox_hash::XxHash64, Box<[K]>, V>, core::ops::RangeInclusive<K>))
}

/// Implement Default so that we can swap value out to perform node bursting/splitting.
impl<K, T, V> Default for NodeType<K, T, V> 
where K: Copy + core::hash::Hash + PartialEq + PartialOrd + Sized,
      Box<[K]>: Clone + PartialEq,
      T: TrieNode<K, V>,
      V: Clone {
        fn default() -> Self {
            NodeType::None
        }
}

/// A TrieNode operation where every implementation of Trie in this crate shall support.
pub trait TrieNode<K, V> 
where K: Copy + core::hash::Hash + PartialEq + PartialOrd + Sized,
      Box<[K]>: Clone + PartialEq,
      V: Clone,
      Self: Sized {
    /// Construct a new trie node from a bucket of pure node.
    /// This happen whenever burst or split threshold reach.
    /// It consume a given bucket and return a new Trie node which contains
    /// splitted bucket as childs.
    /// 
    /// The new trie node will have specified burst/split threshold.
    fn new_split(bucket: ArrayHash<twox_hash::XxHash64, Box<[K]>, V>, threshold: usize) -> Self;
    /// Get a child node of given key from current node.
    fn child(&'_ self, key: &K) -> &'_ NodeType<K, Self, V>;
    /// Retrieve a value of current node.
    fn value(&self) -> Option<&V>;
    /// Retrieve a mutable value of current node.
    fn value_mut(&mut self) -> &mut Option<V>;
    /// Put value into this trie node.
    /// 
    /// If key is already exist in this trie, it replace existing entry with give
    /// key/value and return existing value to caller.
    fn put(&mut self, key: &[K], value: V) -> Option<V>;
    /// Try putting value into this trie.
    /// 
    /// If key is already exist in this trie, it will return existing value to caller without
    /// any change to the trie.
    fn try_put<'a>(&'a mut self, key: &[K], value: V) -> Option<&'a V> where K: 'a;

    /// Utilities function that help shortcut key lookup when
    /// caller request key greater than max_key_len, it mean there's no such key
    /// in this trie.
    /// 
    /// By default, it return MAX value of usize.
    #[inline(always)]
    fn max_key_len(&self) -> usize {
        usize::MAX
    }
    /// Utilities function that help shortcut key lookup when
    /// caller request key less than min_key_len, it mean there's no such key
    /// in this trie.
    /// 
    /// By default, it return MIN value of usize.
    #[inline(always)]
    fn min_key_len(&self) -> usize {
        usize::MIN
    }
    
    /// Get a value from this trie associated with given key slice.
    fn get<'a>(&'a self, key: &[K]) -> Option<&'a V> where K: 'a {
        
        // Key that is not part of this trie
        match key.len() {
            0 => return None,
            x if x < self.min_key_len() => return None,
            x if x > self.max_key_len() => return None,
            _ => ()
        } 

        let mut offset = 0; // Progress of key being process
        // let mut nodes = &self.child(key); // Set root childs as nodes to be search for a key
        let mut parent = self;
        let mut node;

        loop {

            if offset < key.len() {
                // if there's some remain key to be process, we analyze child of current node
                // Dense Trie can use byte as index into each possible slot
                node = parent.child(&key[offset]);

                match node {
                    NodeType::None => {
                        return None
                    }
                    NodeType::Trie(t) => {
                        // For simple trie, we just add all childs back to node waiting to be process
                        parent = t;
                    },
                    NodeType::Pure(child) | NodeType::Hybrid((child, _)) => {
                        // For Pure/Hybrid type, it's `ArrayHash` that contains whole remaining key.
                        // We can simply leverage `get` method using remaining key.
                        return child.get(&key[offset..].into())
                    }
                }
            } else {
                return parent.value()
            }
            offset += 1; // Move offset so next time key being use will be those that is not yet processed.
        }
    }

    /// Find all possible prefix in given key that match with this trie node.
    /// 
    /// # Parameter
    /// `key` - a slice of key to find a prefix.
    /// 
    /// # Return
    /// It return [PrefixIterator](struct.PrefixIterator.html) which can be used to 
    /// access tuple of prefix slice and value of node in this trie.
    fn prefix<'a, 'b>(&'a self, key: &'b [K]) -> PrefixIterator<'a, 'b, K, Self, V> {
        PrefixIterator {
            _phantom: core::marker::PhantomData,
            bucket: None,
            cursor: 0,
            cur_len: self.min_key_len(), // We shall reduce comparison by start at shortest key in this trie.
            node: self,
            // We only need to check prefix up to longest key in this trie.
            query: &key[..core::cmp::min(self.max_key_len(), key.len())] 
        }
    }

    /// Find a longest prefix from given key from this Trie.
    /// 
    /// This is a utility function that perform exactly like you iterate on method 
    /// [prefix](struct.TrieNode.html#method.prefix) to find a longest prefix by yourself.
    fn longest_prefix<'a, 'b>(&'a self, key: &'b [K]) -> Option<(&'b[K], &'a V)> where K: 'a {
        let mut longest: Option<(&[K], &V)> = None;
        self.prefix(key).for_each(|(key, value)| {
            if let Some((k, _)) = longest {
                if key.len() > k.len() {
                    longest = Some((key, value));
                }
            } else {
                longest = Some((key, value));
            }
        });

        longest
    }
}

/// An iterator that return prefix of given query.
/// 
/// This iterator also return an exact match to query. It doesn't guarantee order of return prefix.
#[derive(Debug)]
pub struct PrefixIterator<'a, 'b, K, T, V> 
where K: Copy + core::hash::Hash + PartialEq + PartialOrd + Sized,
      Box<[K]>: Clone + PartialEq,
      T: 'a + TrieNode<K, V>,
      V: Clone {
    _phantom: core::marker::PhantomData<V>,
    bucket: Option<&'a ArrayHash<twox_hash::XxHash64, Box<[K]>, V>>,
    // bucket: Option<ahtable::ArrayHashIterator<'a, Box<[K]>, V>>,
    cursor: usize,
    cur_len: usize,
    node: &'a T,
    query: &'b [K]
}

impl<'a, 'b, K, T, V>  Iterator for PrefixIterator<'a, 'b, K, T, V> 
where K: Copy + core::hash::Hash + PartialEq + PartialOrd + Sized,
      Box<[K]>: Clone + PartialEq,
      T: 'a + TrieNode<K, V>,
      V: Clone {
    type Item=(&'b[K], &'a V);

    fn next(&mut self) -> Option<Self::Item> {
        if let Some(ref mut bucket) = self.bucket {
            // Resume iterating over query and use smart_get method to find if such key exist
            for i in (self.cursor + self.cur_len)..=self.query.len() {
                let cur_key = &self.query[self.cursor..i];
                if let Some(v) = bucket.smart_get(cur_key) {
                    self.cur_len = i - self.cursor + 1;
                    return Some((&self.query[..i], v))
                }
            }

            // below code use iterative style to fetch for prefix

            // while let Some((key, value)) = bucket.next() {
            //     let bound = self.cursor + key.len();
            //     if bound <= self.query.len() && &self.query[self.cursor..bound] == &**key {
            //         return Some((&self.query[..bound], value))
            //     }
            // }

            // It is impossible to find value in any other bucket because of the type of data structure
            return None
        } else if self.query.len() == 0 {
            return None
        } else {
            // Traverse the trie to find either a trie node with value or
            // a container node which will require some other way to find a prefix.
            while self.bucket.is_none() {
                match self.node.child(&self.query[self.cursor]) {
                    NodeType::None => {
                        return None
                    },
                    NodeType::Trie(ref t) => {
                        self.node = t;
                        self.cursor += 1;

                        if t.value().is_some() {
                            return Some((&self.query[..=self.cursor], t.value().as_ref().unwrap()))
                        }
                    },
                    NodeType::Pure(ref bucket) => {
                        self.bucket = Some(bucket);
                        // self.bucket = Some(bucket.iter());
                    },
                    NodeType::Hybrid((ref bucket, _)) => {
                        self.bucket = Some(bucket);
                        // self.bucket = Some(bucket.iter());
                    }
                }
            }

            // Mirror a code above to iterating on query element and use smart_get to find if key exist
            if let Some(ref mut bucket) = self.bucket {
                for i in (self.cursor + self.cur_len)..=self.query.len() {
                    let cur_key = &self.query[self.cursor..i];
                    if let Some(v) = bucket.smart_get(cur_key) {
                        self.cur_len = i - self.cursor + 1;
                        return Some((&self.query[..i], v))
                    }
                }

                // below code use iterative style to fetch for prefix

                // while let Some((key, value)) = bucket.next() {
                //     let bound = self.cursor + key.len();
                //     if bound <= self.query.len() && &self.query[self.cursor..bound] == &**key {
                //         return Some((&self.query[..bound], value))
                //     }
                // }
                return None
            }
        }
        None
    }
}

impl<'a, 'b, K, T, V> core::iter::FusedIterator for PrefixIterator<'a, 'b, K, T, V> 
where K: Copy + core::hash::Hash + PartialEq + PartialOrd + Sized,
      Box<[K]>: Clone + PartialEq,
      T: 'a + TrieNode<K, V>,
      V: Clone {

}