use crate::graph::VertexId;
use roaring::RoaringBitmap;
use std::collections::hash_map::DefaultHasher;
use std::collections::HashSet;
use std::hash::{Hash, Hasher};
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct WitnessHandle {
inner: Arc<ImplicitWitness>,
}
#[derive(Debug)]
pub struct ImplicitWitness {
pub seed: VertexId,
pub membership: RoaringBitmap,
pub boundary_size: u64,
pub hash: u64,
}
impl WitnessHandle {
pub fn new(seed: VertexId, membership: RoaringBitmap, boundary_size: u64) -> Self {
debug_assert!(
seed <= u32::MAX as u64,
"Seed vertex {} exceeds u32::MAX",
seed
);
debug_assert!(
membership.contains(seed as u32),
"Seed vertex {} must be in membership set",
seed
);
let hash = Self::compute_hash(seed, &membership);
Self {
inner: Arc::new(ImplicitWitness {
seed,
membership,
boundary_size,
hash,
}),
}
}
fn compute_hash(seed: VertexId, membership: &RoaringBitmap) -> u64 {
let mut hasher = DefaultHasher::new();
seed.hash(&mut hasher);
for vertex in membership.iter() {
vertex.hash(&mut hasher);
}
hasher.finish()
}
#[inline]
pub fn contains(&self, v: VertexId) -> bool {
if v > u32::MAX as u64 {
return false;
}
self.inner.membership.contains(v as u32)
}
#[inline]
pub fn boundary_size(&self) -> u64 {
self.inner.boundary_size
}
#[inline]
pub fn seed(&self) -> VertexId {
self.inner.seed
}
#[inline]
pub fn hash(&self) -> u64 {
self.inner.hash
}
pub fn materialize_partition(&self) -> (HashSet<VertexId>, HashSet<VertexId>) {
let u: HashSet<VertexId> = self.inner.membership.iter().map(|v| v as u64).collect();
let max_vertex = self.inner.membership.max().unwrap_or(0) as u64;
let v_minus_u: HashSet<VertexId> = (0..=max_vertex)
.filter(|&v| !self.inner.membership.contains(v as u32))
.collect();
(u, v_minus_u)
}
#[inline]
pub fn cardinality(&self) -> u64 {
self.inner.membership.len()
}
}
impl PartialEq for WitnessHandle {
fn eq(&self, other: &Self) -> bool {
if self.inner.hash != other.inner.hash {
return false;
}
self.inner.seed == other.inner.seed
&& self.inner.boundary_size == other.inner.boundary_size
&& self.inner.membership == other.inner.membership
}
}
impl Eq for WitnessHandle {}
pub trait Witness {
fn contains(&self, v: VertexId) -> bool;
fn boundary_size(&self) -> u64;
fn materialize_partition(&self) -> (HashSet<VertexId>, HashSet<VertexId>);
fn seed(&self) -> VertexId;
fn cardinality(&self) -> u64;
}
impl Witness for WitnessHandle {
#[inline]
fn contains(&self, v: VertexId) -> bool {
WitnessHandle::contains(self, v)
}
#[inline]
fn boundary_size(&self) -> u64 {
WitnessHandle::boundary_size(self)
}
fn materialize_partition(&self) -> (HashSet<VertexId>, HashSet<VertexId>) {
WitnessHandle::materialize_partition(self)
}
#[inline]
fn seed(&self) -> VertexId {
WitnessHandle::seed(self)
}
#[inline]
fn cardinality(&self) -> u64 {
WitnessHandle::cardinality(self)
}
}
#[derive(Debug, Clone)]
pub struct LazyWitness {
seed: VertexId,
radius: usize,
boundary_size: u64,
cached: std::sync::OnceLock<WitnessHandle>,
}
impl LazyWitness {
pub fn new(seed: VertexId, radius: usize, boundary_size: u64) -> Self {
Self {
seed,
radius,
boundary_size,
cached: std::sync::OnceLock::new(),
}
}
#[inline]
pub fn seed(&self) -> VertexId {
self.seed
}
#[inline]
pub fn radius(&self) -> usize {
self.radius
}
#[inline]
pub fn boundary_size(&self) -> u64 {
self.boundary_size
}
#[inline]
pub fn is_materialized(&self) -> bool {
self.cached.get().is_some()
}
pub fn materialize<F>(&self, adjacency: F) -> WitnessHandle
where
F: Fn(VertexId) -> Vec<VertexId>,
{
self.cached
.get_or_init(|| {
let mut membership = RoaringBitmap::new();
let mut visited = HashSet::new();
let mut queue = std::collections::VecDeque::new();
queue.push_back((self.seed, 0usize));
visited.insert(self.seed);
membership.insert(self.seed as u32);
while let Some((vertex, dist)) = queue.pop_front() {
if dist >= self.radius {
continue;
}
for neighbor in adjacency(vertex) {
if visited.insert(neighbor) {
membership.insert(neighbor as u32);
queue.push_back((neighbor, dist + 1));
}
}
}
WitnessHandle::new(self.seed, membership, self.boundary_size)
})
.clone()
}
pub fn set_materialized(&self, witness: WitnessHandle) {
let _ = self.cached.set(witness);
}
pub fn get_cached(&self) -> Option<&WitnessHandle> {
self.cached.get()
}
}
#[derive(Debug, Default)]
pub struct LazyWitnessBatch {
witnesses: Vec<LazyWitness>,
materialized_count: std::sync::atomic::AtomicUsize,
}
impl LazyWitnessBatch {
pub fn new() -> Self {
Self {
witnesses: Vec::new(),
materialized_count: std::sync::atomic::AtomicUsize::new(0),
}
}
pub fn with_capacity(capacity: usize) -> Self {
Self {
witnesses: Vec::with_capacity(capacity),
materialized_count: std::sync::atomic::AtomicUsize::new(0),
}
}
pub fn push(&mut self, witness: LazyWitness) {
self.witnesses.push(witness);
}
pub fn get(&self, index: usize) -> Option<&LazyWitness> {
self.witnesses.get(index)
}
pub fn len(&self) -> usize {
self.witnesses.len()
}
pub fn is_empty(&self) -> bool {
self.witnesses.is_empty()
}
pub fn materialized_count(&self) -> usize {
self.materialized_count
.load(std::sync::atomic::Ordering::Relaxed)
}
pub fn materialize<F>(&self, index: usize, adjacency: F) -> Option<WitnessHandle>
where
F: Fn(VertexId) -> Vec<VertexId>,
{
self.witnesses.get(index).map(|lazy| {
let was_materialized = lazy.is_materialized();
let handle = lazy.materialize(adjacency);
if !was_materialized {
self.materialized_count
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
}
handle
})
}
pub fn find_smallest_boundary(&self) -> Option<&LazyWitness> {
self.witnesses.iter().min_by_key(|w| w.boundary_size())
}
pub fn iter(&self) -> impl Iterator<Item = &LazyWitness> {
self.witnesses.iter()
}
}
#[cfg(test)]
mod lazy_tests {
use super::*;
#[test]
fn test_lazy_witness_new() {
let lazy = LazyWitness::new(42, 5, 10);
assert_eq!(lazy.seed(), 42);
assert_eq!(lazy.radius(), 5);
assert_eq!(lazy.boundary_size(), 10);
assert!(!lazy.is_materialized());
}
#[test]
fn test_lazy_witness_materialize() {
let lazy = LazyWitness::new(0, 2, 3);
let adjacency = |v: VertexId| -> Vec<VertexId> {
match v {
0 => vec![1],
1 => vec![0, 2],
2 => vec![1, 3],
3 => vec![2, 4],
4 => vec![3],
_ => vec![],
}
};
let handle = lazy.materialize(adjacency);
assert!(handle.contains(0));
assert!(handle.contains(1));
assert!(handle.contains(2));
assert!(!handle.contains(3)); assert!(lazy.is_materialized());
}
#[test]
fn test_lazy_witness_caching() {
let lazy = LazyWitness::new(0, 1, 5);
let call_count = std::sync::atomic::AtomicUsize::new(0);
let adjacency = |v: VertexId| -> Vec<VertexId> {
call_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
if v == 0 {
vec![1, 2]
} else {
vec![]
}
};
let _h1 = lazy.materialize(&adjacency);
let first_count = call_count.load(std::sync::atomic::Ordering::Relaxed);
let _h2 = lazy.materialize(&adjacency);
let second_count = call_count.load(std::sync::atomic::Ordering::Relaxed);
assert_eq!(first_count, second_count);
}
#[test]
fn test_lazy_witness_batch() {
let mut batch = LazyWitnessBatch::with_capacity(3);
batch.push(LazyWitness::new(0, 2, 5));
batch.push(LazyWitness::new(1, 3, 3)); batch.push(LazyWitness::new(2, 1, 7));
assert_eq!(batch.len(), 3);
assert_eq!(batch.materialized_count(), 0);
let smallest = batch.find_smallest_boundary().unwrap();
assert_eq!(smallest.seed(), 1);
assert_eq!(smallest.boundary_size(), 3);
}
#[test]
fn test_batch_materialize() {
let mut batch = LazyWitnessBatch::new();
batch.push(LazyWitness::new(0, 1, 5));
let adjacency = |_v: VertexId| -> Vec<VertexId> { vec![1, 2] };
let handle = batch.materialize(0, adjacency).unwrap();
assert!(handle.contains(0));
assert!(handle.contains(1));
assert!(handle.contains(2));
assert_eq!(batch.materialized_count(), 1);
}
}