mod utils;
use std::convert::TryInto;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
const TRIE_DEPTH_THRESHOLD: usize = 8;
const BTREE_MIN_OCCUPANCY: f32 = 0.4;
const NODE_SIZE: usize = 256;
const BTREE_MAX_KEYS: usize = NODE_SIZE / 2;
pub trait ToBytes {
fn to_bytes(&self) -> Vec<u8>;
}
impl ToBytes for String {
fn to_bytes(&self) -> Vec<u8> {
self.as_bytes().to_vec()
}
}
impl ToBytes for str {
fn to_bytes(&self) -> Vec<u8> {
self.as_bytes().to_vec()
}
}
impl ToBytes for [u8] {
fn to_bytes(&self) -> Vec<u8> {
self.to_vec()
}
}
impl ToBytes for Vec<u8> {
fn to_bytes(&self) -> Vec<u8> {
self.clone()
}
}
macro_rules! impl_to_bytes_for_int {
($($t:ty),*) => {
$(
impl ToBytes for $t {
fn to_bytes(&self) -> Vec<u8> {
self.to_be_bytes().to_vec()
}
}
)*
}
}
impl_to_bytes_for_int!(u8, u16, u32, u64, u128, i8, i16, i32, i64, i128);
pub trait FromBytes: Sized {
fn from_bytes(bytes: &[u8]) -> Option<Self>;
}
impl FromBytes for String {
fn from_bytes(bytes: &[u8]) -> Option<Self> {
String::from_utf8(bytes.to_vec()).ok()
}
}
impl FromBytes for Vec<u8> {
fn from_bytes(bytes: &[u8]) -> Option<Self> {
Some(bytes.to_vec())
}
}
macro_rules! impl_from_bytes_for_int {
($($t:ty),*) => {
$(
impl FromBytes for $t {
fn from_bytes(bytes: &[u8]) -> Option<Self> {
if bytes.len() == std::mem::size_of::<$t>() {
let array = bytes.try_into().ok()?;
Some(Self::from_be_bytes(array))
} else {
None
}
}
}
)*
}
}
impl_from_bytes_for_int!(u8, u16, u32, u64, u128, i8, i16, i32, i64, i128);
enum NodeType<K: Clone + Ord, V> {
Trie(TrieNode<K, V>),
BTree(BTreeNode<K, V>),
}
struct SplitInfo<K: Clone + Ord + ToBytes, V: Clone> {
median_key: K,
median_value: V,
right_node: Arc<RwLock<NodeType<K, V>>>,
}
struct TrieNode<K: Clone + Ord, V> {
children: Vec<Option<Arc<RwLock<NodeType<K, V>>>>>,
value: Option<V>,
depth: usize,
size: AtomicUsize,
}
struct BTreeNode<K: Clone + Ord, V> {
keys: Vec<K>,
values: Vec<V>,
children: Vec<Arc<RwLock<NodeType<K, V>>>>,
is_leaf: bool,
}
pub struct ASTrie<K: Clone + Ord, V> {
root: Arc<RwLock<NodeType<K, V>>>,
size: AtomicUsize,
}
impl<K: Clone + Ord + ToBytes + FromBytes, V: Clone> ASTrie<K, V> {
pub fn new() -> Self {
ASTrie {
root: Arc::new(RwLock::new(NodeType::Trie(TrieNode {
children: vec![None; 256], value: None,
depth: 0,
size: AtomicUsize::new(0),
}))),
size: AtomicUsize::new(0),
}
}
pub fn get(&self, key: &K) -> Option<V> {
let key_bytes: Vec<u8> = key.to_bytes();
let mut current: Arc<RwLock<NodeType<K, V>>> = self.root.clone();
loop {
let next_node: Option<Arc<RwLock<NodeType<K, V>>>> = {
let node: RwLockReadGuard<'_, NodeType<K, V>> = current.read().unwrap();
match &*node {
NodeType::Trie(trie_node) => {
if trie_node.depth == key_bytes.len() {
return trie_node.value.clone();
}
let current_byte: usize = key_bytes[trie_node.depth] as usize;
match &trie_node.children[current_byte] {
Some(child) => Some(child.clone()),
None => None,
}
}
NodeType::BTree(btree_node) => {
if btree_node.is_leaf {
match btree_node.keys.binary_search(key) {
Ok(idx) => return Some(btree_node.values[idx].clone()),
Err(_) => return None,
}
} else {
let child_idx: usize = match btree_node.keys.binary_search(key) {
Ok(idx) => idx + 1,
Err(idx) => idx,
};
Some(btree_node.children[child_idx].clone())
}
}
}
};
match next_node {
Some(next) => current = next,
None => return None,
}
}
}
pub fn range(&self, start: &K, end: &K) -> Vec<(K, V)> {
let mut result: Vec<(K, V)> = Vec::new();
if start > end {
return result;
}
let root: RwLockReadGuard<'_, NodeType<K, V>> = self.root.read().unwrap();
match &*root {
NodeType::Trie(trie_node) => {
utils::collect_trie_range(trie_node, Vec::new(), start, end, &mut result);
}
NodeType::BTree(btree_node) => {
utils::collect_btree_range(btree_node, start, end, &mut result);
}
}
result.sort_by(|a, b| a.0.cmp(&b.0));
result
}
pub fn insert(&self, key: K, value: V) -> Option<V> {
let mut current: Arc<RwLock<NodeType<K, V>>> = self.root.clone();
let mut old_value: Option<V> = None;
let mut path: Vec<(Arc<RwLock<NodeType<K, V>>>, usize)> = Vec::new();
loop {
let next_node: Arc<RwLock<NodeType<K, V>>> = {
let mut node: RwLockWriteGuard<'_, NodeType<K, V>> = current.write().unwrap();
match &mut *node {
NodeType::Trie(trie_node) => {
if trie_node.depth >= TRIE_DEPTH_THRESHOLD {
let new_btree: NodeType<K, V> = utils::convert_to_btree(trie_node);
*node = new_btree;
continue; }
let key_bytes: Vec<u8> = utils::key_to_bytes(&key);
let current_byte: u8 = key_bytes.get(trie_node.depth).copied().unwrap_or(0);
if trie_node.depth == key_bytes.len() {
old_value = trie_node.value.replace(value);
break;
}
if trie_node.children[current_byte as usize].is_none() {
trie_node.children[current_byte as usize] =
Some(Arc::new(RwLock::new(NodeType::Trie(TrieNode {
children: vec![None; 256],
value: None,
depth: trie_node.depth + 1,
size: AtomicUsize::new(0),
}))));
}
trie_node.children[current_byte as usize]
.as_ref()
.unwrap()
.clone()
}
NodeType::BTree(btree_node) => {
if btree_node.is_leaf {
match btree_node.keys.binary_search(&key) {
Ok(idx) => {
old_value =
Some(std::mem::replace(&mut btree_node.values[idx], value));
break;
}
Err(idx) => {
btree_node.keys.insert(idx, key.clone());
btree_node.values.insert(idx, value.clone());
if btree_node.keys.len() > BTREE_MAX_KEYS {
let split_info: Option<SplitInfo<K, V>> =
utils::split_btree_node(btree_node);
if let Some(split_info) = split_info {
if path.is_empty() {
let new_root: BTreeNode<K, V> = BTreeNode {
keys: vec![split_info.median_key],
values: vec![split_info.median_value],
children: vec![
current.clone(),
split_info.right_node,
],
is_leaf: false,
};
*node = NodeType::BTree(new_root);
} else {
let (parent, child_idx) = path.pop().unwrap();
let mut parent: RwLockWriteGuard<
'_,
NodeType<K, V>,
> = parent.write().unwrap();
if let NodeType::BTree(parent_node) = &mut *parent {
utils::handle_split(
parent_node,
child_idx,
split_info,
);
}
}
}
} else if ((btree_node.keys.len() as f32) / (NODE_SIZE as f32))
< BTREE_MIN_OCCUPANCY
{
*node = utils::convert_to_trie(btree_node);
}
break;
}
}
} else {
let child_idx: usize = match btree_node.keys.binary_search(&key) {
Ok(idx) => idx + 1,
Err(idx) => idx,
};
btree_node.children[child_idx].clone()
}
}
}
};
current = next_node;
}
self.size.fetch_add(1, Ordering::Relaxed);
old_value
}
pub fn update(&self, key: &K, new_value: V) -> Option<V> {
let key_bytes: Vec<u8> = key.to_bytes();
let mut current: Arc<RwLock<NodeType<K, V>>> = self.root.clone();
loop {
let next_node: Option<Arc<RwLock<NodeType<K, V>>>> = {
let mut node: RwLockWriteGuard<'_, NodeType<K, V>> = current.write().unwrap();
match &mut *node {
NodeType::Trie(trie_node) => {
if trie_node.depth == key_bytes.len() {
return trie_node.value.replace(new_value);
}
let current_byte = key_bytes[trie_node.depth] as usize;
match &trie_node.children[current_byte] {
Some(child) => Some(child.clone()),
None => None,
}
}
NodeType::BTree(btree_node) => {
if btree_node.is_leaf {
match btree_node.keys.binary_search(key) {
Ok(idx) => {
return Some(std::mem::replace(
&mut btree_node.values[idx],
new_value,
));
}
Err(_) => return None,
}
} else {
let child_idx: usize = match btree_node.keys.binary_search(key) {
Ok(idx) => idx + 1,
Err(idx) => idx,
};
Some(btree_node.children[child_idx].clone())
}
}
}
};
match next_node {
Some(next) => current = next,
None => return None,
}
}
}
pub fn delete(&self, key: &K) -> Option<V> {
let key_bytes = key.to_bytes();
let mut current = self.root.clone();
let mut path: Vec<(Arc<RwLock<NodeType<K, V>>>, usize)> = Vec::new();
loop {
let next_node: Option<Arc<RwLock<NodeType<K, V>>>> = {
let node: RwLockReadGuard<'_, NodeType<K, V>> = current.read().unwrap();
match &*node {
NodeType::Trie(trie_node) => {
if trie_node.depth == key_bytes.len() {
break; }
let current_byte: usize = key_bytes[trie_node.depth] as usize;
match &trie_node.children[current_byte] {
Some(child) => {
path.push((current.clone(), current_byte));
Some(child.clone())
}
None => None,
}
}
NodeType::BTree(btree_node) => {
if btree_node.is_leaf {
break; } else {
let child_idx: usize = match btree_node.keys.binary_search(key) {
Ok(idx) => idx + 1,
Err(idx) => idx,
};
path.push((current.clone(), child_idx));
Some(btree_node.children[child_idx].clone())
}
}
}
};
match next_node {
Some(next) => current = next,
None => return None,
}
}
#[allow(unused_assignments)]
let mut value_to_return: Option<V> = None;
{
let mut node: RwLockWriteGuard<'_, NodeType<K, V>> = current.write().unwrap();
match &mut *node {
NodeType::Trie(trie_node) => {
value_to_return = trie_node.value.take();
trie_node.size.fetch_sub(1, Ordering::Relaxed);
}
NodeType::BTree(btree_node) => {
match btree_node.keys.binary_search(key) {
Ok(idx) => {
value_to_return = Some(btree_node.values.remove(idx));
btree_node.keys.remove(idx);
if !btree_node.is_leaf && btree_node.keys.len() < BTREE_MAX_KEYS / 4 {
utils::merge_btree_nodes(btree_node, &path);
}
}
Err(_) => return None,
}
}
}
}
if let Some(_value) = value_to_return.as_ref() {
utils::cleanup_empty_trie_nodes(&path);
self.size.fetch_sub(1, Ordering::Relaxed);
}
value_to_return
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_trie_node<K: Clone + Ord + ToBytes + FromBytes, V: Clone>(
value: Option<V>,
depth: usize,
) -> TrieNode<K, V> {
TrieNode {
children: vec![None; 256],
value,
depth,
size: AtomicUsize::new(0),
}
}
fn create_test_btree_node<K: Clone + Ord + ToBytes + FromBytes, V: Clone>(
keys: Vec<K>,
values: Vec<V>,
is_leaf: bool,
) -> BTreeNode<K, V> {
BTreeNode {
keys,
values,
children: Vec::new(),
is_leaf,
}
}
#[test]
fn test_collect_trie_range() {
let mut result: Vec<(String, i32)> = Vec::new();
let mut node: TrieNode<String, i32> = create_test_trie_node::<String, i32>(Some(1), 0);
let child: TrieNode<String, i32> = create_test_trie_node(Some(2), 1);
node.children[97] = Some(Arc::new(RwLock::new(NodeType::Trie(child))));
utils::collect_trie_range(
&node,
Vec::new(),
&"a".to_string(),
&"c".to_string(),
&mut result,
);
assert_eq!(result.len(), 1);
assert!(result.iter().any(|(k, v)| k == "a" && *v == 2));
}
#[test]
fn test_collect_btree_range() {
let mut result: Vec<(i32, String)> = Vec::new();
let node: BTreeNode<i32, String> = create_test_btree_node(
vec![1, 3, 5],
vec!["one".to_string(), "three".to_string(), "five".to_string()],
true,
);
utils::collect_btree_range(&node, &1, &4, &mut result);
assert_eq!(result.len(), 2);
assert_eq!(result[0], (1, "one".to_string()));
assert_eq!(result[1], (3, "three".to_string()));
}
#[test]
fn test_insert_into_trie() {
let mut root: TrieNode<String, i32> = create_test_trie_node::<String, i32>(None, 0);
let key: String = "test".to_string();
let value: i32 = 42;
let key_bytes: &[u8] = key.as_bytes();
utils::insert_into_trie(&mut root, &key, value, 0, key_bytes);
fn verify_path(
node: &Option<Arc<RwLock<NodeType<String, i32>>>>,
remaining_bytes: &[u8],
expected_value: i32,
) -> bool {
match node {
None => false,
Some(child) => {
if let Ok(guard) = child.read() {
match &*guard {
NodeType::Trie(trie_node) => {
if remaining_bytes.is_empty() {
return trie_node.value == Some(expected_value);
}
let next_byte = remaining_bytes[0] as usize;
verify_path(
&trie_node.children[next_byte],
&remaining_bytes[1..],
expected_value,
)
}
_ => false,
}
} else {
false
}
}
}
}
let first_byte: usize = key_bytes[0] as usize;
assert!(verify_path(
&root.children[first_byte],
&key_bytes[1..],
value
));
}
#[test]
fn test_collect_leaf_pairs() {
let mut pairs: Vec<(i32, String)> = Vec::new();
let leaf1: BTreeNode<i32, String> =
create_test_btree_node(vec![1, 2], vec!["one".to_string(), "two".to_string()], true);
let leaf2: BTreeNode<i32, String> = create_test_btree_node(
vec![3, 4],
vec!["three".to_string(), "four".to_string()],
true,
);
let mut parent: BTreeNode<i32, String> =
create_test_btree_node(vec![3], vec!["three".to_string()], false);
parent.children = vec![
Arc::new(RwLock::new(NodeType::BTree(leaf1))),
Arc::new(RwLock::new(NodeType::BTree(leaf2))),
];
utils::collect_leaf_pairs(&parent, &mut pairs);
assert_eq!(pairs.len(), 4);
assert_eq!(pairs[0], (1, "one".to_string()));
assert_eq!(pairs[3], (4, "four".to_string()));
}
#[test]
fn test_handle_split() {
let mut parent: BTreeNode<i32, String> = BTreeNode {
keys: vec![1, 5],
values: vec!["one".to_string(), "five".to_string()],
children: vec![
Arc::new(RwLock::new(NodeType::BTree(BTreeNode {
keys: vec![0],
values: vec!["zero".to_string()],
children: Vec::new(),
is_leaf: true,
}))),
Arc::new(RwLock::new(NodeType::BTree(BTreeNode {
keys: vec![2],
values: vec!["two".to_string()],
children: Vec::new(),
is_leaf: true,
}))),
Arc::new(RwLock::new(NodeType::BTree(BTreeNode {
keys: vec![6],
values: vec!["six".to_string()],
children: Vec::new(),
is_leaf: true,
}))),
],
is_leaf: false,
};
let right_node: Arc<RwLock<NodeType<i32, String>>> =
Arc::new(RwLock::new(NodeType::BTree(BTreeNode {
keys: vec![4],
values: vec!["four".to_string()],
children: Vec::new(),
is_leaf: true,
})));
let split_info: SplitInfo<i32, String> = SplitInfo {
median_key: 3,
median_value: "three".to_string(),
right_node: right_node.clone(),
};
utils::handle_split(&mut parent, 1, split_info);
assert_eq!(parent.keys, vec![1, 3, 5], "Keys should be [1, 3, 5]");
assert_eq!(
parent.values,
vec!["one".to_string(), "three".to_string(), "five".to_string()],
"Values should be [one, three, five]"
);
assert_eq!(
parent.children.len(),
4,
"Should have 4 children after split"
);
fn verify_child_node(
child: &Arc<RwLock<NodeType<i32, String>>>,
expected_keys: Vec<i32>,
expected_values: Vec<String>,
) -> bool {
if let Ok(guard) = child.read() {
match &*guard {
NodeType::BTree(node) => {
node.keys == expected_keys && node.values == expected_values
}
_ => false,
}
} else {
false
}
}
assert!(
verify_child_node(&parent.children[0], vec![0], vec!["zero".to_string()]),
"First child should have key [0]"
);
assert!(
verify_child_node(&parent.children[1], vec![2], vec!["two".to_string()]),
"Second child should have key [2]"
);
assert!(
verify_child_node(&parent.children[2], vec![4], vec!["four".to_string()]),
"Third child should have key [4]"
);
assert!(
verify_child_node(&parent.children[3], vec![6], vec!["six".to_string()]),
"Fourth child should have key [6]"
);
}
#[test]
fn test_split_btree_node() {
let mut leaf: BTreeNode<i32, String> = BTreeNode {
keys: vec![1, 2, 3, 4, 5],
values: vec![
"1".to_string(),
"2".to_string(),
"3".to_string(),
"4".to_string(),
"5".to_string(),
],
children: Vec::new(),
is_leaf: true,
};
let split_info: Option<SplitInfo<i32, String>> = utils::split_btree_node(&mut leaf);
assert!(split_info.is_some());
let info: SplitInfo<i32, String> = split_info.unwrap();
assert_eq!(leaf.keys, vec![1, 2]);
assert_eq!(leaf.values, vec!["1".to_string(), "2".to_string()]);
if let Ok(guard) = info.right_node.read() {
match &*guard {
NodeType::BTree(right) => {
assert_eq!(right.keys, vec![3, 4, 5]);
assert_eq!(
right.values,
vec!["3".to_string(), "4".to_string(), "5".to_string()]
);
assert!(right.is_leaf);
}
_ => panic!("Expected BTree node"),
}
}
assert_eq!(info.median_key, 3);
assert_eq!(info.median_value, "3".to_string());
let mut internal: BTreeNode<i32, String> = BTreeNode {
keys: vec![10, 20, 30, 40],
values: vec![
"10".to_string(),
"20".to_string(),
"30".to_string(),
"40".to_string(),
],
children: vec![
Arc::new(RwLock::new(NodeType::BTree(BTreeNode {
keys: vec![5],
values: vec!["5".to_string()],
children: Vec::new(),
is_leaf: true,
}))),
Arc::new(RwLock::new(NodeType::BTree(BTreeNode {
keys: vec![15],
values: vec!["15".to_string()],
children: Vec::new(),
is_leaf: true,
}))),
Arc::new(RwLock::new(NodeType::BTree(BTreeNode {
keys: vec![25],
values: vec!["25".to_string()],
children: Vec::new(),
is_leaf: true,
}))),
Arc::new(RwLock::new(NodeType::BTree(BTreeNode {
keys: vec![35],
values: vec!["35".to_string()],
children: Vec::new(),
is_leaf: true,
}))),
Arc::new(RwLock::new(NodeType::BTree(BTreeNode {
keys: vec![45],
values: vec!["45".to_string()],
children: Vec::new(),
is_leaf: true,
}))),
],
is_leaf: false,
};
let split_info: Option<SplitInfo<i32, String>> = utils::split_btree_node(&mut internal);
assert!(split_info.is_some());
let info: SplitInfo<i32, String> = split_info.unwrap();
assert_eq!(internal.keys, vec![10, 20]);
assert_eq!(internal.values, vec!["10".to_string(), "20".to_string()]);
assert_eq!(internal.children.len(), 3);
if let Ok(guard) = info.right_node.read() {
match &*guard {
NodeType::BTree(right) => {
assert_eq!(right.keys, vec![40]);
assert_eq!(right.values, vec!["40".to_string()]);
assert_eq!(right.children.len(), 2);
assert!(!right.is_leaf);
}
_ => panic!("Expected BTree node"),
}
}
assert_eq!(info.median_key, 30);
assert_eq!(info.median_value, "30".to_string());
}
#[test]
fn test_collect_pairs() {
let mut pairs: Vec<(String, i32)> = Vec::new();
let mut node: TrieNode<String, i32> = create_test_trie_node::<String, i32>(Some(1), 0);
let child1: TrieNode<String, i32> = create_test_trie_node(Some(2), 1);
let child2: TrieNode<String, i32> = create_test_trie_node(Some(3), 1);
node.children[97] = Some(Arc::new(RwLock::new(NodeType::Trie(child1)))); node.children[98] = Some(Arc::new(RwLock::new(NodeType::Trie(child2))));
utils::collect_pairs(&node, Vec::new(), &mut pairs);
assert_eq!(pairs.len(), 3);
}
#[test]
fn test_convert_to_trie() {
let btree: BTreeNode<String, i32> =
create_test_btree_node(vec!["a".to_string(), "b".to_string()], vec![1, 2], true);
let result: NodeType<String, i32> = utils::convert_to_trie(&btree);
match result {
NodeType::Trie(trie) => {
let key_bytes: &[u8] = "a".as_bytes();
fn verify_trie_path(
children: &[Option<Arc<RwLock<NodeType<String, i32>>>>],
bytes: &[u8],
expected_value: i32,
) -> bool {
if bytes.is_empty() {
return false;
}
let byte: usize = bytes[0] as usize;
if let Some(ref child) = children[byte] {
if let Ok(guard) = child.read() {
match &*guard {
NodeType::Trie(trie_node) => {
if bytes.len() == 1 {
trie_node.value == Some(expected_value)
} else {
verify_trie_path(
&trie_node.children,
&bytes[1..],
expected_value,
)
}
}
_ => false,
}
} else {
false
}
} else {
false
}
}
assert!(verify_trie_path(&trie.children, key_bytes, 1));
}
_ => panic!("Expected Trie node"),
}
}
#[test]
fn test_convert_to_btree() {
let mut trie: TrieNode<String, i32> = create_test_trie_node(Some(1), 0);
let child: TrieNode<String, i32> = create_test_trie_node(Some(2), 1);
trie.children[97] = Some(Arc::new(RwLock::new(NodeType::Trie(child))));
let result: NodeType<String, i32> = utils::convert_to_btree(&trie);
match result {
NodeType::BTree(btree) => {
assert!(btree.is_leaf);
assert_eq!(btree.keys.len(), 2);
assert_eq!(btree.values.len(), 2);
}
_ => panic!("Expected BTree node"),
}
}
#[test]
fn test_btree_node_operations() {
let btree: BTreeNode<String, i32> =
create_test_btree_node(vec!["a".to_string(), "b".to_string()], vec![1, 2], true);
assert_eq!(btree.keys.len(), 2);
assert_eq!(btree.values.len(), 2);
assert!(btree.is_leaf);
}
#[test]
fn test_string_key_conversion() {
let key: String = String::from("hello");
assert_eq!(key.to_bytes(), b"hello");
}
#[test]
fn test_integer_key_conversion() {
let key: u32 = 42;
assert_eq!(key.to_bytes(), vec![0, 0, 0, 42]);
let key: i16 = -1;
assert_eq!(key.to_bytes(), vec![255, 255]);
}
#[test]
fn test_bytes_key_conversion() {
let key: Vec<u8> = vec![1, 2, 3, 4];
assert_eq!(key.to_bytes(), vec![1, 2, 3, 4]);
}
}