#![allow(clippy::cast_precision_loss)]
use super::degree_router::EdgeIndex;
use rustc_hash::FxHashMap;
const FRAGMENTATION_THRESHOLD: f64 = 0.30;
#[derive(Debug, Clone)]
pub struct ClusteredIndex {
data: Vec<u64>,
index: FxHashMap<u64, (usize, usize)>,
free_slots: Vec<(usize, usize)>,
free_bytes: usize,
}
impl ClusteredIndex {
#[must_use]
pub fn new() -> Self {
Self {
data: Vec::new(),
index: FxHashMap::default(),
free_slots: Vec::new(),
free_bytes: 0,
}
}
#[must_use]
pub fn with_capacity(node_capacity: usize, data_capacity: usize) -> Self {
Self {
data: Vec::with_capacity(data_capacity),
index: FxHashMap::with_capacity_and_hasher(node_capacity, rustc_hash::FxBuildHasher),
free_slots: Vec::new(),
free_bytes: 0,
}
}
#[must_use]
pub fn get_neighbors(&self, node_id: u64) -> &[u64] {
if let Some(&(offset, length)) = self.index.get(&node_id) {
&self.data[offset..offset + length]
} else {
&[]
}
}
pub fn insert(&mut self, node_id: u64, target: u64) {
if let Some(&(offset, length)) = self.index.get(&node_id) {
let slice = &self.data[offset..offset + length];
if slice.contains(&target) {
return;
}
let new_length = length + 1;
let can_extend = offset + length == self.data.len()
|| self.try_merge_adjacent_free(offset + length, 1);
if can_extend && offset + length == self.data.len() {
self.data.push(target);
self.index.insert(node_id, (offset, new_length));
} else if can_extend {
self.data[offset + length] = target;
self.index.insert(node_id, (offset, new_length));
} else {
let old_data: Vec<u64> = self.data[offset..offset + length].to_vec();
self.mark_free(offset, length);
let new_offset = self.allocate_slot(new_length);
for (i, &val) in old_data.iter().enumerate() {
self.data[new_offset + i] = val;
}
self.data[new_offset + length] = target;
self.index.insert(node_id, (new_offset, new_length));
}
} else {
let offset = self.allocate_slot(1);
self.data[offset] = target;
self.index.insert(node_id, (offset, 1));
}
self.maybe_compact();
}
pub fn remove(&mut self, node_id: u64, target: u64) -> bool {
if let Some(&(offset, length)) = self.index.get(&node_id) {
let slice = &self.data[offset..offset + length];
if let Some(pos) = slice.iter().position(|&t| t == target) {
if length == 1 {
self.mark_free(offset, length);
self.index.remove(&node_id);
} else {
self.data[offset + pos] = self.data[offset + length - 1];
self.index.insert(node_id, (offset, length - 1));
self.free_slots.push((offset + length - 1, 1));
self.free_bytes += 1;
}
return true;
}
}
false
}
pub fn remove_node(&mut self, node_id: u64) {
if let Some((offset, length)) = self.index.remove(&node_id) {
self.mark_free(offset, length);
}
}
#[must_use]
pub fn node_count(&self) -> usize {
self.index.len()
}
#[must_use]
pub fn edge_count(&self) -> usize {
self.index.values().map(|(_, len)| len).sum()
}
#[must_use]
pub fn fragmentation(&self) -> f64 {
if self.data.is_empty() {
0.0
} else {
self.free_bytes as f64 / self.data.len() as f64
}
}
pub fn compact(&mut self) {
if self.free_bytes == 0 {
return;
}
let entries: Vec<(u64, Vec<u64>)> = self
.index
.iter()
.map(|(&node_id, &(offset, length))| {
(node_id, self.data[offset..offset + length].to_vec())
})
.collect();
self.data.clear();
self.index.clear();
self.free_slots.clear();
self.free_bytes = 0;
for (node_id, neighbors) in entries {
let offset = self.data.len();
let length = neighbors.len();
self.data.extend(neighbors);
self.index.insert(node_id, (offset, length));
}
}
#[must_use]
pub fn contains(&self, node_id: u64, target: u64) -> bool {
self.get_neighbors(node_id).contains(&target)
}
#[must_use]
pub fn neighbor_count(&self, node_id: u64) -> usize {
self.index.get(&node_id).map_or(0, |(_, len)| *len)
}
fn allocate_slot(&mut self, needed: usize) -> usize {
for i in 0..self.free_slots.len() {
let (offset, length) = self.free_slots[i];
if length >= needed {
if length > needed {
self.free_slots[i] = (offset + needed, length - needed);
} else {
self.free_slots.swap_remove(i);
}
self.free_bytes = self.free_bytes.saturating_sub(needed);
return offset;
}
}
let offset = self.data.len();
self.data.resize(self.data.len() + needed, 0);
offset
}
fn mark_free(&mut self, offset: usize, length: usize) {
self.free_slots.push((offset, length));
self.free_bytes += length;
self.merge_adjacent_free_slots();
}
fn try_merge_adjacent_free(&mut self, offset: usize, needed: usize) -> bool {
for i in 0..self.free_slots.len() {
let (free_offset, free_length) = self.free_slots[i];
if free_offset == offset && free_length >= needed {
if free_length > needed {
self.free_slots[i] = (free_offset + needed, free_length - needed);
} else {
self.free_slots.swap_remove(i);
}
self.free_bytes = self.free_bytes.saturating_sub(needed);
return true;
}
}
false
}
fn merge_adjacent_free_slots(&mut self) {
if self.free_slots.len() < 2 {
return;
}
self.free_slots.sort_by_key(|(offset, _)| *offset);
let mut i = 0;
while i < self.free_slots.len() - 1 {
let (offset1, len1) = self.free_slots[i];
let (offset2, len2) = self.free_slots[i + 1];
if offset1 + len1 == offset2 {
self.free_slots[i] = (offset1, len1 + len2);
self.free_slots.remove(i + 1);
} else {
i += 1;
}
}
}
fn maybe_compact(&mut self) {
if self.fragmentation() > FRAGMENTATION_THRESHOLD {
self.compact();
}
}
}
impl Default for ClusteredIndex {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug)]
pub struct ClusteredEdgeIndex<'a> {
index: &'a mut ClusteredIndex,
node_id: u64,
}
impl<'a> ClusteredEdgeIndex<'a> {
pub fn new(index: &'a mut ClusteredIndex, node_id: u64) -> Self {
Self { index, node_id }
}
}
impl EdgeIndex for ClusteredEdgeIndex<'_> {
fn insert(&mut self, target: u64) {
self.index.insert(self.node_id, target);
}
fn remove(&mut self, target: u64) -> bool {
self.index.remove(self.node_id, target)
}
fn contains(&self, target: u64) -> bool {
self.index.contains(self.node_id, target)
}
fn targets(&self) -> Vec<u64> {
self.index.get_neighbors(self.node_id).to_vec()
}
fn len(&self) -> usize {
self.index.neighbor_count(self.node_id)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_clustered_index_basic() {
let mut index = ClusteredIndex::new();
index.insert(1, 10);
index.insert(1, 20);
index.insert(1, 30);
assert_eq!(index.get_neighbors(1).len(), 3);
assert!(index.contains(1, 10));
assert!(index.contains(1, 20));
assert!(index.contains(1, 30));
}
#[test]
fn test_clustered_index_multiple_nodes() {
let mut index = ClusteredIndex::new();
index.insert(1, 10);
index.insert(1, 20);
index.insert(2, 100);
index.insert(2, 200);
index.insert(3, 1000);
assert_eq!(index.node_count(), 3);
assert_eq!(index.edge_count(), 5);
assert_eq!(index.neighbor_count(1), 2);
assert_eq!(index.neighbor_count(2), 2);
assert_eq!(index.neighbor_count(3), 1);
}
#[test]
fn test_clustered_index_no_duplicates() {
let mut index = ClusteredIndex::new();
index.insert(1, 10);
index.insert(1, 10);
index.insert(1, 10);
assert_eq!(index.neighbor_count(1), 1);
}
#[test]
fn test_clustered_index_remove() {
let mut index = ClusteredIndex::new();
index.insert(1, 10);
index.insert(1, 20);
index.insert(1, 30);
assert!(index.remove(1, 20));
assert!(!index.contains(1, 20));
assert_eq!(index.neighbor_count(1), 2);
assert!(!index.remove(1, 99)); }
#[test]
fn test_clustered_index_remove_node() {
let mut index = ClusteredIndex::new();
index.insert(1, 10);
index.insert(1, 20);
index.insert(2, 100);
index.remove_node(1);
assert_eq!(index.node_count(), 1);
assert_eq!(index.neighbor_count(1), 0);
assert_eq!(index.neighbor_count(2), 1);
}
#[test]
fn test_clustered_index_compaction() {
let mut index = ClusteredIndex::new();
for i in 0..10 {
for j in 0..5 {
index.insert(i, j * 100);
}
}
for i in 0..5 {
index.remove_node(i);
}
let frag_before = index.fragmentation();
assert!(frag_before > 0.0);
index.compact();
assert!(index.fragmentation().abs() < f64::EPSILON);
assert_eq!(index.node_count(), 5);
}
#[test]
fn test_clustered_index_slot_reuse() {
let mut index = ClusteredIndex::new();
index.insert(1, 10);
index.insert(1, 20);
index.remove_node(1);
index.insert(2, 100);
assert_eq!(index.node_count(), 1);
assert!(index.contains(2, 100));
}
}