use {
kermit_algos::{JoinAlgo, JoinAlgorithm, JoinQuery, LeapfrogTriejoin},
kermit_ds::{ColumnTrie, IndexStructure, Relation, RelationFileExt, TreeTrie},
std::{collections::HashMap, path::Path},
};
pub trait DB {
fn new(name: String) -> Self
where
Self: Sized;
fn name(&self) -> &String;
fn add_relation(&mut self, name: &str, arity: usize);
fn add_keys(&mut self, relation_name: &str, keys: Vec<usize>);
fn add_keys_batch(&mut self, relation_name: &str, keys: Vec<Vec<usize>>);
fn join(&self, query: kermit_algos::JoinQuery) -> Vec<Vec<usize>>;
fn add_file(&mut self, filepath: &Path) -> Result<(), std::io::Error>;
}
pub struct DatabaseEngine<R, JA>
where
R: Relation,
JA: JoinAlgo<R>,
{
name: String,
relations: HashMap<String, R>,
phantom_rb: std::marker::PhantomData<R>,
phantom_ja: std::marker::PhantomData<JA>,
}
impl<R, JA> DB for DatabaseEngine<R, JA>
where
R: Relation,
JA: JoinAlgo<R>,
{
fn new(name: String) -> Self
where
Self: Sized,
{
DatabaseEngine {
name,
relations: HashMap::new(),
phantom_rb: std::marker::PhantomData,
phantom_ja: std::marker::PhantomData,
}
}
fn name(&self) -> &String { &self.name }
fn add_relation(&mut self, name: &str, arity: usize) {
let relation = R::new(arity.into());
self.relations.insert(name.to_owned(), relation);
}
fn add_keys(&mut self, relation_name: &str, keys: Vec<usize>) {
self.relations.get_mut(relation_name).unwrap().insert(keys);
}
fn add_keys_batch(&mut self, relation_name: &str, keys: Vec<Vec<usize>>) {
self.relations
.get_mut(relation_name)
.unwrap()
.insert_all(keys);
}
fn join(&self, query: JoinQuery) -> Vec<Vec<usize>> {
let mut ds_map: HashMap<String, &R> = HashMap::new();
for pred in &query.body {
let r = self
.relations
.get(&pred.name)
.expect("missing relation in DB for predicate");
ds_map.entry(pred.name.clone()).or_insert(r);
}
JA::join_iter(query, ds_map).collect()
}
fn add_file(&mut self, filepath: &Path) -> Result<(), std::io::Error> {
let path = filepath;
let extension = path.extension().and_then(|s| s.to_str()).unwrap_or("");
let relation = match extension.to_lowercase().as_str() {
| "csv" => R::from_csv(path)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?,
| "parquet" => R::from_parquet(path)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?,
| _ => {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
format!("Unsupported file extension: {}", extension),
))
},
};
let relation_name = relation.header().name().to_string();
self.relations.insert(relation_name, relation);
Ok(())
}
}
impl<R, JA> DatabaseEngine<R, JA>
where
R: Relation,
JA: JoinAlgo<R>,
{
pub fn new(name: String) -> Self { <Self as DB>::new(name) }
}
pub fn instantiate_database(ds: IndexStructure, ja: JoinAlgorithm) -> Box<dyn DB> {
match (ds, ja) {
| (IndexStructure::TreeTrie, JoinAlgorithm::LeapfrogTriejoin) => Box::new(
DatabaseEngine::<TreeTrie, LeapfrogTriejoin>::new("test".to_string()),
),
| (IndexStructure::ColumnTrie, JoinAlgorithm::LeapfrogTriejoin) => Box::new(
DatabaseEngine::<ColumnTrie, LeapfrogTriejoin>::new("test".to_string()),
),
}
}
#[cfg(test)]
mod tests {
use {
super::*,
kermit_algos::{JoinQuery, LeapfrogTriejoin},
kermit_ds::TreeTrie,
};
#[test]
fn test_relation() {
let mut db: DatabaseEngine<TreeTrie, LeapfrogTriejoin> =
DatabaseEngine::new("test".to_string());
let relation_name = "apple".to_string();
db.add_relation(&relation_name, 3);
db.add_keys(&relation_name, vec![1, 2, 3])
}
#[test]
fn test_join() {
let mut db: DatabaseEngine<TreeTrie, LeapfrogTriejoin> =
DatabaseEngine::new("test".to_string());
db.add_relation("first", 1);
db.add_keys_batch("first", vec![vec![1_usize], vec![2], vec![3]]);
db.add_relation("second", 1);
db.add_keys_batch("second", vec![vec![1_usize], vec![2], vec![3]]);
let query: JoinQuery = "Q(X) :- first(X), second(X).".parse().unwrap();
db.join(query);
}
}