use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use crate::error::Result;
use crate::columnar::column::Column;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum IndexType {
BTree,
Hash,
Bitmap,
Inverted,
}
pub struct Index {
column_name: String,
index_type: IndexType,
_base_dir: PathBuf,
data: HashMap<Vec<u8>, Vec<u64>>,
}
impl Index {
pub fn new(column_name: &str, index_type: IndexType, base_dir: &str) -> Self {
let base_path = Path::new(base_dir).join(format!("index_{}", column_name));
Index {
column_name: column_name.to_string(),
index_type,
_base_dir: base_path,
data: HashMap::new(),
}
}
pub fn build(&mut self, column: &Column) -> Result<()> {
self.data.clear();
for i in 0..column.row_count() {
if let Some(value) = column.get_bytes(i) {
let entry = self.data.entry(value.to_vec()).or_insert_with(Vec::new);
entry.push(i);
}
}
Ok(())
}
pub fn lookup(&self, value: &[u8]) -> Option<&[u64]> {
self.data.get(value).map(|v| v.as_slice())
}
pub fn lookup_range(&self, start: &[u8], end: &[u8]) -> Vec<u64> {
let mut result = HashSet::new();
for (key, rows) in &self.data {
if key.as_slice() >= start && key.as_slice() <= end {
result.extend(rows);
}
}
let mut vec: Vec<u64> = result.into_iter().collect();
vec.sort();
vec
}
pub fn column_name(&self) -> &str {
&self.column_name
}
pub fn index_type(&self) -> IndexType {
self.index_type
}
pub fn cardinality(&self) -> usize {
self.data.len()
}
pub fn entry_count(&self) -> usize {
self.data.values().map(|v| v.len()).sum()
}
}