mycroft_support/
join.rs

1//! `join` implements a simple indexed join with the assumption of a global attribute order. This
2//! is likely not the optimal join, but it will get us started.
3/// Shorthand for a variable length tuple
4pub type Tuple = Vec<usize>;
5/// A `SkipIterator` is like a normal iterator, but:
6///
7/// * The arity of all returned `Tuple`s must be identical, and equal to the value returned by
8///   `arity()`
9/// * It can be moved to the first value above or equal to a tuple, meaning the iterator must be
10///   rewindable.
11pub trait SkipIterator {
12    /// Provides the next tuple in the iterator, the fact ID it was derived from
13    fn next(&mut self) -> Option<(Tuple, Vec<usize>)>;
14    /// Sets the iterator position to return the minimum value which is greater than or equal to
15    /// the provided min.
16    fn skip(&mut self, min: Tuple);
17    /// Returns the arity of the tuples that will be returned by `next()`
18    fn arity(&self) -> usize;
19    /// Returns the total length of the iterator, if it were fully rewound
20    fn len(&self) -> usize;
21}
22
23/// A field specifies a particular value in the join problem
24#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Copy, Clone, Hash)]
25pub struct Field {
26    /// `clause` refers to which iterator this value comes from, based on the order they were
27    /// provided to the `Join` constructor
28    pub clause: usize,
29    /// `field` indicates offset into the tuple referred to
30    pub field: usize,
31}
32
33/// A `Restrict` describes a way a particular `Field` can be limited in the join
34#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Copy, Clone, Hash)]
35pub enum Restrict {
36    /// The value at `Field` must equal the provided value
37    Const(usize),
38    /// The value at `Field` must equal all other values which were restricted by being `Unify`'d
39    /// to the same provided key
40    Unify(usize),
41}
42
43impl Restrict {
44    // Checks whether a restriction is met, updating the candidate if it has a new variable
45    // definition
46    fn check(&self, candidate: &mut Vec<usize>, val: usize, order: &[usize]) -> bool {
47        match *self {
48            Restrict::Const(v) => v == val,
49            Restrict::Unify(var) => if order[var] < candidate.len() {
50                candidate[order[var]] == val
51            } else if order[var] == candidate.len() {
52                candidate.push(val);
53                true
54            } else {
55                panic!("Out of order variable numbering")
56            },
57        }
58    }
59    // Provides the minimum legal value for the field, assuming the candidate we have partially
60    // locked in.
61    fn min(&self, candidate: &Vec<usize>, order: &[usize]) -> usize {
62        match *self {
63            Restrict::Const(v) => v,
64            Restrict::Unify(var) => {
65                let var_xlat = order[var];
66                if var_xlat < candidate.len() {
67                    candidate[var_xlat]
68                } else {
69                    0
70                }
71            }
72        }
73    }
74}
75
76/// A join iterator, made of multiple `SkipIterators`, combined with the join condition.
77pub struct Join<'a> {
78    // Order we're going to solve in
79    order: Vec<usize>,
80    // Based on the reorder of predicates, how to read the restricts as candidate idx
81    var_old_to_new: Vec<usize>,
82    indices: Vec<&'a mut SkipIterator>,
83    restricts: &'a Vec<Vec<Option<Restrict>>>,
84    // Currently selected variable assignment
85    candidate: Vec<usize>,
86    // Stack of previous variable assignment lengths so we can rewind choices
87    candidate_len: Vec<usize>,
88    // Stack of fact ids in use
89    fids: Vec<Vec<usize>>,
90}
91
92fn min_possible(
93    candidate: &Vec<usize>,
94    restricts: &Vec<Option<Restrict>>,
95    order: &[usize],
96) -> Vec<usize> {
97    let mut out = Vec::new();
98    for mr in restricts.iter() {
99        match *mr {
100            None => out.push(0),
101            Some(ref r) => out.push(r.min(candidate, order)),
102        }
103    }
104    out
105}
106
107// Provide an ordering for walking the SkipIterators that will do the smallest index at the
108// outside, and move in from there
109fn reorder_indices(indices: &Vec<&mut SkipIterator>) -> Vec<usize> {
110    let mut lens: Vec<(usize, usize)> = indices.iter().map(|x| x.len()).enumerate().collect();
111    lens.sort_by_key(|x| x.1);
112    lens.into_iter().map(|x| x.0).collect()
113}
114
115// Based on the ordering of iterators, renumber the variables
116fn reorder_vars(order: &[usize], restricts: &[Vec<Option<Restrict>>]) -> Vec<usize> {
117    let max_var_len: usize = restricts
118        .iter()
119        .flat_map(|pred| pred.iter().flat_map(|mr| mr.iter()))
120        .filter_map(|r| if let Restrict::Unify(var) = *r {
121            Some(var)
122        } else {
123            None
124        })
125        .max()
126        // If we have a max_var, we need one more than it for space
127        .map(|x| x + 1)
128        // If we have no restricts, we have no variables - this could happen for a constant query
129        .unwrap_or(0);
130    let mut new_to_old = Vec::new();
131    let mut old_to_new = vec![0; max_var_len];
132    for idx in order {
133        for mr in &restricts[*idx] {
134            if let Some(Restrict::Unify(var)) = *mr {
135                if !new_to_old.contains(&var) {
136                    old_to_new[var] = new_to_old.len();
137                    new_to_old.push(var);
138                }
139            }
140        }
141    }
142    old_to_new
143}
144
145impl<'a> Join<'a> {
146    /// Creates a new join iterator:
147    ///
148    /// * `indices` will be walked in the order provided, so if you know you have a small one, try to
149    ///   put it first
150    /// * `restricts` must use a tight, ascending variable ordering, that is:
151    ///
152    ///   * If field0 < field1, and both map to `Unify(var0)` and `Unify(var1)` respectively, var0
153    ///     < var1
154    ///   * If `Unify(var)` is present in the map, and var0 < var, then `Unify(var0)` is present in
155    ///     the map
156    pub fn new(
157        indices: Vec<&'a mut SkipIterator>,
158        restricts: &'a Vec<Vec<Option<Restrict>>>,
159    ) -> Self {
160        let order = reorder_indices(&indices);
161        let var_old_to_new = reorder_vars(&order, restricts);
162        let mut join = Join {
163            order: order,
164            var_old_to_new: var_old_to_new,
165            indices: indices,
166            restricts: restricts,
167            candidate: Vec::new(),
168            candidate_len: Vec::new(),
169            fids: Vec::new(),
170        };
171        // We need to .right() once before starting to initialize the leftmost iterator properly
172        join.right();
173        join
174    }
175
176    // Moves one iterator left, e.g. because we have no more possibilities on this one
177    fn left(&mut self) {
178        self.fids.pop();
179        self.fids.pop();
180        self.candidate.truncate(self.candidate_len.pop().unwrap());
181        self.candidate.truncate(self.candidate_len.pop().unwrap());
182    }
183
184    // Moves one iterator right, e.g. this one matched, and we need to finish filling the candidate
185    // and checking restrictions
186    fn right(&mut self) {
187        let n = self.order[self.candidate_len.len()];
188        self.indices[n].skip(min_possible(
189            &self.candidate,
190            &self.restricts[n],
191            &self.var_old_to_new,
192        ));
193    }
194}
195impl<'a> Iterator for Join<'a> {
196    type Item = (Tuple, Vec<Vec<usize>>);
197    fn next(&mut self) -> Option<Self::Item> {
198        // Join invariants:
199        // 1.) candidate_len.len() indicates the "current" index
200        // 2.) All indices less than the current index are coherent, and their candidate values
201        //     are in candidate
202        // 3.) The current index is at least past the minimum possible advancement level
203        //     (this needs to be set up in new())
204        'states: loop {
205            let n = self.order[self.candidate_len.len()];
206            self.candidate_len.push(self.candidate.len());
207            match self.indices[n].next() {
208                Some((tup, fid)) => {
209                    self.fids.push(fid);
210                    for (f, v) in tup.iter().enumerate() {
211                        if let Some(r) = self.restricts[n][f] {
212                            if !r.check(&mut self.candidate, *v, &self.var_old_to_new) {
213                                if n == self.order[0] {
214                                    self.candidate.truncate(self.candidate_len.pop().unwrap());
215                                    self.fids.pop();
216                                    return None;
217                                }
218                                if f == 0 {
219                                    self.left();
220                                } else {
221                                    let mut min = min_possible(
222                                        &self.candidate,
223                                        &self.restricts[n],
224                                        &self.var_old_to_new,
225                                    );
226                                    if min[f] <= tup[f] {
227                                        // We've passed the min, step the previous if possible
228                                        if f != 0 {
229                                            min[f - 1] += 1;
230                                        } else {
231                                            // If the first element has gone valid -> invalid,
232                                            // it's time to go to the previous clause
233                                            self.left();
234                                            continue 'states;
235                                        }
236                                    }
237                                    self.indices[n].skip(min);
238                                    self.candidate.truncate(self.candidate_len.pop().unwrap());
239                                    self.fids.pop();
240                                }
241                                continue 'states;
242                            }
243                        }
244                    }
245
246                    if self.candidate_len.len() == self.indices.len() {
247                        // We have a complete candidate
248                        let mut out = Vec::new();
249                        for idx in &self.var_old_to_new {
250                            out.push(self.candidate[*idx])
251                        }
252                        let mut fids = self.fids.clone();
253                        for (new, old) in self.order.iter().enumerate() {
254                            fids[*old] = self.fids[new].clone();
255                        }
256                        self.candidate.truncate(self.candidate_len.pop().unwrap());
257                        self.fids.pop();
258
259                        return Some((out, fids));
260                    }
261                    // We're not done yet
262                    self.right();
263                }
264                None => {
265                    if n == self.order[0] {
266                        self.candidate.truncate(self.candidate_len.pop().unwrap());
267                        return None;
268                    }
269                    // TODO: left() abstraction broken with new fids, this hacks around it
270                    self.fids.push(vec![]);
271                    self.left();
272                }
273            }
274        }
275    }
276}
277
278#[cfg(test)]
279mod test {
280    /// `TrivialIterator` provides a sort+dedup implementation of a `SkipIterator`.
281    /// It will be slower than traditional indexes in essentailly all cases - it is _not_ the
282    /// equivalent of a full tablescan, that is just an iterator with .skip() no-opped.
283    /// It's provided to allow tests to separate from index implementation/initialization.
284    pub struct TrivialIterator {
285        payload: Vec<Vec<usize>>,
286        loc: usize,
287    }
288
289    impl TrivialIterator {
290        /// Consumes a list of tuples and produces a `SkipIterator` for them
291        pub fn new(mut payload: Vec<Vec<usize>>) -> Self {
292            payload.sort();
293            payload.dedup();
294            TrivialIterator {
295                payload: payload,
296                loc: 0,
297            }
298        }
299    }
300
301    impl SkipIterator for TrivialIterator {
302        fn next(&mut self) -> Option<(Tuple, Vec<usize>)> {
303            if self.loc < self.payload.len() {
304                self.loc += 1;
305                Some((self.payload[self.loc - 1].clone(), vec![self.loc - 1]))
306            } else {
307                None
308            }
309        }
310        fn skip(&mut self, min: Tuple) {
311            self.loc = 0;
312            while (self.payload.len() > self.loc) && (self.payload[self.loc] < min) {
313                self.loc = self.loc + 1;
314            }
315        }
316        fn arity(&self) -> usize {
317            self.payload[0].len()
318        }
319        fn len(&self) -> usize {
320            self.payload.len()
321        }
322    }
323    use super::*;
324
325    #[test]
326    fn simple_join() {
327        use self::Restrict::*;
328        let mut p = TrivialIterator::new(vec![vec![0, 2, 3], vec![3, 2, 5], vec![4, 4, 4]]);
329        let mut q = TrivialIterator::new(vec![vec![1, 3], vec![2, 7], vec![3, 4], vec![8, 2]]);
330        let restricts = vec![
331            vec![Some(Unify(0)), Some(Unify(1)), None],
332            vec![Some(Unify(1)), Some(Const(7))],
333        ];
334        let its: Vec<&mut SkipIterator> = vec![&mut p, &mut q];
335        let mut join = Join::new(its, &restricts);
336        assert_eq!(join.next().map(|x| x.0), Some(vec![0, 2]));
337        assert_eq!(join.next().map(|x| x.0), Some(vec![3, 2]));
338        assert_eq!(join.next().map(|x| x.0), None);
339        assert_eq!(join.next().map(|x| x.0), None);
340    }
341
342    #[test]
343    fn reorder_vars_flip() {
344        use self::Restrict::*;
345        let order = &[0, 1];
346        let order_flip = &[1, 0];
347        let restrict_flip = vec![
348            vec![Some(Unify(0)), Some(Unify(1))],
349            vec![Some(Unify(1)), Some(Unify(0))],
350        ];
351        assert_eq!(&reorder_vars(order, &restrict_flip), &[0, 1]);
352        assert_eq!(&reorder_vars(order_flip, &restrict_flip), &[1, 0]);
353    }
354
355    // Derived from actual data causing a mistake in reordering, that's why the inputs are a bit
356    // arcane
357    #[test]
358    fn reorder_real() {
359        use self::Restrict::*;
360        let mut p = TrivialIterator::new(vec![vec![16, 16], vec![18, 18], vec![26, 26]]);
361        let mut q = TrivialIterator::new(vec![vec![16, 88], vec![18, 89]]);
362        let restricts = vec![
363            vec![Some(Unify(0)), Some(Unify(1))],
364            vec![Some(Unify(1)), Some(Unify(2))],
365        ];
366        let its: Vec<&mut SkipIterator> = vec![&mut p, &mut q];
367        let join = Join::new(its, &restricts);
368        assert_eq!(join.collect::<Vec<_>>().len(), 2)
369    }
370
371    // Evidently the first one overminimized - it still found a bug, but there's more!
372    #[test]
373    fn reorder_real2() {
374        use self::Restrict::*;
375        let mut p = TrivialIterator::new(vec![vec![16, 16], vec![16, 88]]);
376
377        let mut q = TrivialIterator::new(vec![vec![88, 15]]);
378        let restricts = vec![
379            vec![Some(Unify(0)), Some(Unify(1))],
380            vec![Some(Unify(1)), Some(Unify(2))],
381        ];
382        let its: Vec<&mut SkipIterator> = vec![&mut p, &mut q];
383        let join = Join::new(its, &restricts);
384        assert_eq!(join.collect::<Vec<_>>().len(), 1)
385    }
386}