use {
crate::{
join_algo::JoinAlgo,
leapfrog_join::{LeapfrogJoinIter, LeapfrogJoinIterator},
},
kermit_iters::{LinearIterator, TrieIterable, TrieIterator, TrieIteratorWrapper},
kermit_parser::{JoinQuery, Term},
std::collections::HashMap,
};
pub trait LeapfrogTriejoinIterator: LeapfrogJoinIterator {
fn triejoin_open(&mut self) -> bool;
fn triejoin_up(&mut self) -> bool;
}
pub struct LeapfrogTriejoinIter<IT>
where
IT: TrieIterator,
{
arity: usize,
iterator_pool: Vec<Option<IT>>,
active_iter_indices: Vec<usize>,
variable_to_iter_map: Vec<Vec<usize>>,
depth: usize,
leapfrog: LeapfrogJoinIter<IT>,
}
impl<IT> LeapfrogJoinIterator for LeapfrogTriejoinIter<IT>
where
IT: TrieIterator,
{
fn leapfrog_next(&mut self) -> Option<usize> { self.leapfrog.leapfrog_next() }
fn key(&self) -> Option<usize> {
if self.depth == 0 {
None
} else {
self.leapfrog.key()
}
}
fn leapfrog_init(&mut self) -> bool { self.leapfrog.leapfrog_init() }
fn leapfrog_search(&mut self) -> bool { self.leapfrog.leapfrog_search() }
fn at_end(&self) -> bool {
if self.depth == 0 {
return true;
}
self.leapfrog.at_end()
}
fn leapfrog_seek(&mut self, seek_key: usize) -> bool { self.leapfrog.leapfrog_seek(seek_key) }
}
impl<IT> LeapfrogTriejoinIter<IT>
where
IT: TrieIterator,
{
pub fn new(
variable_ordering: Vec<usize>, predicate_variables: Vec<Vec<usize>>, iters: Vec<IT>,
) -> Self {
let mut variable_to_iter_map: Vec<Vec<usize>> = Vec::new();
for v in &variable_ordering {
let mut iters_at_this_depth: Vec<usize> = Vec::new();
for (predicate_i, predicate_vars) in predicate_variables.iter().enumerate() {
if predicate_vars.contains(v) {
iters_at_this_depth.push(predicate_i);
}
}
variable_to_iter_map.push(iters_at_this_depth);
}
let iterator_pool = iters.into_iter().map(Some).collect();
LeapfrogTriejoinIter {
iterator_pool,
active_iter_indices: Vec::new(),
variable_to_iter_map,
arity: variable_ordering.len(),
depth: 0,
leapfrog: LeapfrogJoinIter::new(vec![]),
}
}
fn update_iters(&mut self) {
while let Some(i) = self.active_iter_indices.pop() {
let iter = self
.leapfrog
.iterators
.pop()
.expect("There should always be an iterator here");
self.iterator_pool[i] = Some(iter);
}
if self.depth == 0 {
return;
}
let mut next_iters =
Vec::<IT>::with_capacity(self.variable_to_iter_map[self.depth - 1].len());
for i in &self.variable_to_iter_map[self.depth - 1] {
let iter = self.iterator_pool[*i]
.take()
.expect("There is an iterator here");
next_iters.push(iter);
self.active_iter_indices.push(*i);
}
self.leapfrog = LeapfrogJoinIter::new(next_iters);
}
}
impl<IT> LeapfrogTriejoinIterator for LeapfrogTriejoinIter<IT>
where
IT: TrieIterator,
{
fn triejoin_open(&mut self) -> bool {
if self.depth == self.arity {
return false;
}
self.depth += 1;
self.update_iters();
for iter in &mut self.leapfrog.iterators {
if !iter.open() {
return false;
}
}
self.leapfrog_init()
}
fn triejoin_up(&mut self) -> bool {
if self.depth == 0 {
return false;
}
for iter in &mut self.leapfrog.iterators {
assert!(
iter.up(),
"iterator must be able to move up from non-root depth (LFTJ invariant)"
);
}
self.depth -= 1;
self.update_iters();
true
}
}
impl<IT> TrieIterator for LeapfrogTriejoinIter<IT>
where
IT: TrieIterator,
{
fn open(&mut self) -> bool { self.triejoin_open() }
fn up(&mut self) -> bool { self.triejoin_up() }
}
impl<IT> LinearIterator for LeapfrogTriejoinIter<IT>
where
IT: TrieIterator,
{
fn key(&self) -> Option<usize> { LeapfrogJoinIterator::key(self) }
fn next(&mut self) -> Option<usize> { self.leapfrog_next() }
fn seek(&mut self, seek_key: usize) -> bool { self.leapfrog_seek(seek_key) }
fn at_end(&self) -> bool { LeapfrogJoinIterator::at_end(self) }
}
impl<IT> IntoIterator for LeapfrogTriejoinIter<IT>
where
IT: TrieIterator,
{
type IntoIter = TrieIteratorWrapper<Self>;
type Item = Vec<usize>;
fn into_iter(self) -> Self::IntoIter {
let arity = self.arity;
TrieIteratorWrapper::with_arity(self, arity)
}
}
fn build_variable_index(query: &JoinQuery) -> (Vec<usize>, Vec<Vec<usize>>) {
let mut var_to_index: HashMap<String, usize> = HashMap::new();
let mut next_index: usize = 0;
let register_var = |name: &str, map: &mut HashMap<String, usize>, next: &mut usize| {
*map.entry(name.to_string()).or_insert_with(|| {
let idx = *next;
*next += 1;
idx
})
};
for t in &query.head.terms {
if let Term::Var(ref vname) = t {
let _ = register_var(vname, &mut var_to_index, &mut next_index);
}
}
for pred in &query.body {
for t in &pred.terms {
if let Term::Var(ref vname) = t {
let _ = register_var(vname, &mut var_to_index, &mut next_index);
}
}
}
let variable_ordering: Vec<usize> = (0..var_to_index.len()).collect();
let mut predicate_variables: Vec<Vec<usize>> = Vec::with_capacity(query.body.len());
for pred in &query.body {
let mut vars_for_pred: Vec<usize> = Vec::new();
for t in &pred.terms {
if let Term::Var(ref vname) = t {
if let Some(idx) = var_to_index.get(vname) {
vars_for_pred.push(*idx);
}
}
}
predicate_variables.push(vars_for_pred);
}
(variable_ordering, predicate_variables)
}
pub struct LeapfrogTriejoin {}
impl<DS> JoinAlgo<DS> for LeapfrogTriejoin
where
DS: TrieIterable,
{
fn join_iter(
query: JoinQuery, datastructures: HashMap<String, &DS>,
) -> impl Iterator<Item = Vec<usize>> {
let (variable_ordering, predicate_variables) = build_variable_index(&query);
let trie_iters: Vec<_> = query
.body
.iter()
.map(|pred| {
let ds = datastructures
.get(&pred.name)
.expect("Missing datastructure for predicate name");
ds.trie_iter()
})
.collect();
LeapfrogTriejoinIter::new(variable_ordering, predicate_variables, trie_iters).into_iter()
}
}
#[cfg(test)]
mod tests {
use {
crate::{
leapfrog_join::LeapfrogJoinIterator,
leapfrog_triejoin::{LeapfrogTriejoinIter, LeapfrogTriejoinIterator},
},
kermit_ds::{Relation, TreeTrie},
kermit_iters::TrieIterable,
};
fn triejoin_collect(
variable_ordering: Vec<usize>, predicate_variables: Vec<Vec<usize>>,
relations: Vec<&TreeTrie>,
) -> Vec<Vec<usize>> {
let iters: Vec<_> = relations.iter().map(|r| r.trie_iter()).collect();
LeapfrogTriejoinIter::new(variable_ordering, predicate_variables, iters)
.into_iter()
.collect()
}
#[test]
fn test_classic() {
let t1 = TreeTrie::from_tuples(1.into(), vec![vec![1], vec![2], vec![3]]);
let t2 = TreeTrie::from_tuples(1.into(), vec![vec![1], vec![2], vec![3]]);
let t1_iter = t1.trie_iter();
let t2_iter = t2.trie_iter();
let mut triejoin_iter =
LeapfrogTriejoinIter::new(vec![0], vec![vec![0], vec![0]], vec![t1_iter, t2_iter]);
triejoin_iter.triejoin_open();
assert_eq!(triejoin_iter.key(), Some(1));
assert_eq!(triejoin_iter.leapfrog_next(), Some(2));
assert_eq!(triejoin_iter.leapfrog_next(), Some(3));
triejoin_iter.leapfrog_next();
assert!(triejoin_iter.at_end());
triejoin_iter.triejoin_up();
assert!(triejoin_iter.at_end());
let res = triejoin_iter.into_iter().collect::<Vec<_>>();
assert_eq!(res, vec![vec![1], vec![2], vec![3]]);
}
#[test]
fn more_complicated() {
let r = TreeTrie::from_tuples(2.into(), vec![vec![7, 4]]);
let s = TreeTrie::from_tuples(2.into(), vec![vec![4, 1], vec![4, 4], vec![4, 5], vec![
4, 9,
]]);
let t = TreeTrie::from_tuples(2.into(), vec![vec![7, 2], vec![7, 3], vec![7, 5]]);
let r_iter = r.trie_iter();
let s_iter = s.trie_iter();
let t_iter = t.trie_iter();
let mut triejoin_iter = LeapfrogTriejoinIter::new(
vec![0, 1, 2],
vec![vec![0, 1], vec![1, 2], vec![0, 2]],
vec![r_iter, s_iter, t_iter],
);
triejoin_iter.triejoin_open();
assert_eq!(triejoin_iter.key().unwrap().clone(), 7);
triejoin_iter.leapfrog_next();
assert!(triejoin_iter.at_end());
triejoin_iter.triejoin_open();
assert_eq!(triejoin_iter.key().unwrap().clone(), 4);
triejoin_iter.leapfrog_next();
assert!(triejoin_iter.at_end());
triejoin_iter.triejoin_open();
assert_eq!(triejoin_iter.key().unwrap().clone(), 5);
}
#[test]
fn chain() {
let r = TreeTrie::from_tuples(2.into(), vec![vec![1, 2], vec![2, 3]]);
let s = TreeTrie::from_tuples(2.into(), vec![vec![2, 4], vec![3, 5]]);
let t = TreeTrie::from_tuples(2.into(), vec![vec![4, 6], vec![5, 7]]);
let r_iter = r.trie_iter();
let s_iter = s.trie_iter();
let t_iter = t.trie_iter();
let mut triejoin_iter = LeapfrogTriejoinIter::new(
vec![0, 1, 2, 3],
vec![vec![0, 1], vec![1, 2], vec![2, 3]],
vec![r_iter, s_iter, t_iter],
);
assert!(triejoin_iter.triejoin_open());
assert_eq!(triejoin_iter.key(), Some(1));
assert!(triejoin_iter.triejoin_open());
assert_eq!(triejoin_iter.key(), Some(2));
assert!(triejoin_iter.triejoin_open());
assert_eq!(triejoin_iter.key(), Some(4));
assert!(triejoin_iter.triejoin_open());
assert_eq!(triejoin_iter.key(), Some(6));
assert!(triejoin_iter.triejoin_up());
assert!(triejoin_iter.triejoin_up());
assert!(triejoin_iter.triejoin_up());
assert_eq!(triejoin_iter.leapfrog_next(), Some(2));
assert!(triejoin_iter.triejoin_open());
assert_eq!(triejoin_iter.key(), Some(3));
assert!(triejoin_iter.triejoin_open());
assert_eq!(triejoin_iter.key(), Some(5));
assert!(triejoin_iter.triejoin_open());
assert_eq!(triejoin_iter.key(), Some(7));
}
#[test]
fn unary_intersection() {
let r = TreeTrie::from_tuples(1.into(), vec![vec![1], vec![2], vec![3]]);
let s = TreeTrie::from_tuples(1.into(), vec![vec![2], vec![3], vec![4]]);
assert_eq!(
triejoin_collect(vec![0], vec![vec![0], vec![0]], vec![&r, &s]),
vec![vec![2], vec![3]],
);
}
#[test]
fn unary_no_match() {
let r = TreeTrie::from_tuples(1.into(), vec![vec![1], vec![2]]);
let s = TreeTrie::from_tuples(1.into(), vec![vec![3], vec![4]]);
assert_eq!(
triejoin_collect(vec![0], vec![vec![0], vec![0]], vec![&r, &s]),
Vec::<Vec<usize>>::new(),
);
}
#[test]
fn unary_empty_relation() {
let r = TreeTrie::from_tuples(1.into(), vec![vec![1], vec![2], vec![3]]);
let s = TreeTrie::from_tuples(1.into(), vec![]);
assert_eq!(
triejoin_collect(vec![0], vec![vec![0], vec![0]], vec![&r, &s]),
Vec::<Vec<usize>>::new(),
);
}
#[test]
fn unary_single_match() {
let r = TreeTrie::from_tuples(1.into(), vec![vec![5]]);
let s = TreeTrie::from_tuples(1.into(), vec![vec![5]]);
assert_eq!(
triejoin_collect(vec![0], vec![vec![0], vec![0]], vec![&r, &s]),
vec![vec![5]],
);
}
#[test]
fn three_way_unary() {
let r = TreeTrie::from_tuples(1.into(), vec![vec![1], vec![2], vec![3], vec![4]]);
let s = TreeTrie::from_tuples(1.into(), vec![vec![2], vec![3], vec![5]]);
let t = TreeTrie::from_tuples(1.into(), vec![vec![3], vec![4], vec![5]]);
assert_eq!(
triejoin_collect(vec![0], vec![vec![0], vec![0], vec![0]], vec![&r, &s, &t]),
vec![vec![3]],
);
}
#[test]
fn binary_natural_join() {
let r = TreeTrie::from_tuples(2.into(), vec![vec![1, 2], vec![1, 3]]);
let s = TreeTrie::from_tuples(2.into(), vec![vec![2, 4], vec![3, 5]]);
assert_eq!(
triejoin_collect(vec![0, 1, 2], vec![vec![0, 1], vec![1, 2]], vec![&r, &s]),
vec![vec![1, 2, 4], vec![1, 3, 5]],
);
}
#[test]
fn no_match_at_shared_variable() {
let r = TreeTrie::from_tuples(2.into(), vec![vec![1, 2]]);
let s = TreeTrie::from_tuples(2.into(), vec![vec![3, 4]]);
assert_eq!(
triejoin_collect(vec![0, 1, 2], vec![vec![0, 1], vec![0, 2]], vec![&r, &s]),
Vec::<Vec<usize>>::new(),
);
}
#[test]
fn triangle_join_collect() {
let r = TreeTrie::from_tuples(2.into(), vec![vec![7, 4]]);
let s = TreeTrie::from_tuples(2.into(), vec![vec![4, 1], vec![4, 4], vec![4, 5], vec![
4, 9,
]]);
let t = TreeTrie::from_tuples(2.into(), vec![vec![7, 2], vec![7, 3], vec![7, 5]]);
assert_eq!(
triejoin_collect(
vec![0, 1, 2],
vec![vec![0, 1], vec![1, 2], vec![0, 2]],
vec![&r, &s, &t],
),
vec![vec![7, 4, 5]],
);
}
#[test]
fn star_join() {
let r = TreeTrie::from_tuples(2.into(), vec![vec![1, 2], vec![1, 3], vec![2, 4]]);
let s = TreeTrie::from_tuples(2.into(), vec![vec![1, 5], vec![1, 6], vec![2, 7]]);
assert_eq!(
triejoin_collect(vec![0, 1, 2], vec![vec![0, 1], vec![0, 2]], vec![&r, &s]),
vec![
vec![1, 2, 5],
vec![1, 2, 6],
vec![1, 3, 5],
vec![1, 3, 6],
vec![2, 4, 7],
],
);
}
#[test]
fn self_join() {
let r1 = TreeTrie::from_tuples(2.into(), vec![vec![1, 2], vec![2, 1]]);
let r2 = TreeTrie::from_tuples(2.into(), vec![vec![1, 2], vec![2, 1]]);
assert_eq!(
triejoin_collect(vec![0, 1, 2], vec![vec![0, 1], vec![1, 2]], vec![&r1, &r2]),
vec![vec![1, 2, 1], vec![2, 1, 2]],
);
}
#[test]
fn four_way_chain() {
let r = TreeTrie::from_tuples(2.into(), vec![vec![1, 2]]);
let s = TreeTrie::from_tuples(2.into(), vec![vec![2, 3]]);
let t = TreeTrie::from_tuples(2.into(), vec![vec![3, 4]]);
let u = TreeTrie::from_tuples(2.into(), vec![vec![4, 5]]);
assert_eq!(
triejoin_collect(
vec![0, 1, 2, 3, 4],
vec![vec![0, 1], vec![1, 2], vec![2, 3], vec![3, 4]],
vec![&r, &s, &t, &u],
),
vec![vec![1, 2, 3, 4, 5]],
);
}
#[test]
fn single_relation_passthrough() {
let r = TreeTrie::from_tuples(2.into(), vec![vec![1, 2], vec![3, 4]]);
assert_eq!(
triejoin_collect(vec![0, 1], vec![vec![0, 1]], vec![&r]),
vec![vec![1, 2], vec![3, 4]],
);
}
#[test]
fn column_trie_binary_join() {
use kermit_ds::ColumnTrie;
let r = ColumnTrie::from_tuples(2.into(), vec![vec![1, 2], vec![1, 3]]);
let s = ColumnTrie::from_tuples(2.into(), vec![vec![2, 4], vec![3, 5]]);
let r_iter = r.trie_iter();
let s_iter = s.trie_iter();
let result: Vec<Vec<usize>> =
LeapfrogTriejoinIter::new(vec![0, 1, 2], vec![vec![0, 1], vec![1, 2]], vec![
r_iter, s_iter,
])
.into_iter()
.collect();
assert_eq!(result, vec![vec![1, 2, 4], vec![1, 3, 5]]);
}
#[test]
fn binary_no_match_regression() {
let r = TreeTrie::from_tuples(2.into(), vec![vec![1, 2]]);
let s = TreeTrie::from_tuples(2.into(), vec![vec![3, 4]]);
assert_eq!(
triejoin_collect(vec![0, 1, 2], vec![vec![0, 1], vec![1, 2]], vec![&r, &s]),
Vec::<Vec<usize>>::new(),
);
}
#[test]
fn self_join_with_dead_ends() {
let r1 = TreeTrie::from_tuples(2.into(), vec![vec![1, 2], vec![2, 3], vec![3, 4]]);
let r2 = TreeTrie::from_tuples(2.into(), vec![vec![1, 2], vec![2, 3], vec![3, 4]]);
assert_eq!(
triejoin_collect(vec![0, 1, 2], vec![vec![0, 1], vec![1, 2]], vec![&r1, &r2]),
vec![vec![1, 2, 3], vec![2, 3, 4]],
);
}
#[test]
fn chain_join_collect() {
let r = TreeTrie::from_tuples(2.into(), vec![vec![1, 2], vec![2, 3]]);
let s = TreeTrie::from_tuples(2.into(), vec![vec![2, 4], vec![3, 5]]);
let t = TreeTrie::from_tuples(2.into(), vec![vec![4, 6], vec![5, 7]]);
assert_eq!(
triejoin_collect(
vec![0, 1, 2, 3],
vec![vec![0, 1], vec![1, 2], vec![2, 3]],
vec![&r, &s, &t],
),
vec![vec![1, 2, 4, 6], vec![2, 3, 5, 7]],
);
}
}