ruvector_mincut/instance/witness.rs
1//! Witness types for cut certification
2//!
3//! A witness represents a connected set U ⊆ V with its boundary δ(U).
4//! The witness certifies that a proper cut exists with value |δ(U)|.
5//!
6//! # Representation
7//!
8//! Witnesses use an implicit representation for memory efficiency:
9//! - **Seed vertex**: The starting vertex that defines the connected component
10//! - **Membership bitmap**: Compressed bitmap indicating which vertices are in U
11//! - **Boundary size**: Pre-computed value |δ(U)| for O(1) queries
12//! - **Hash**: Fast equality checking without full comparison
13//!
14//! # Performance
15//!
16//! - `WitnessHandle` uses `Arc` for cheap cloning (O(1))
17//! - `contains()` is O(1) via bitmap lookup
18//! - `boundary_size()` is O(1) via cached value
19//! - `materialize_partition()` is O(|V|) and should be used sparingly
20
21use crate::graph::VertexId;
22use roaring::RoaringBitmap;
23use std::collections::HashSet;
24use std::sync::Arc;
25use std::hash::{Hash, Hasher};
26use std::collections::hash_map::DefaultHasher;
27
28/// Handle to a witness (cheap to clone)
29///
30/// This is the primary type for passing witnesses around. It uses an `Arc`
31/// internally so cloning is O(1) and witnesses can be shared across threads.
32///
33/// # Examples
34///
35/// ```
36/// use ruvector_mincut::instance::witness::WitnessHandle;
37/// use roaring::RoaringBitmap;
38///
39/// let mut membership = RoaringBitmap::new();
40/// membership.insert(1);
41/// membership.insert(2);
42/// membership.insert(3);
43///
44/// let witness = WitnessHandle::new(1, membership, 4);
45/// assert!(witness.contains(1));
46/// assert!(witness.contains(2));
47/// assert!(!witness.contains(5));
48/// assert_eq!(witness.boundary_size(), 4);
49/// ```
50#[derive(Debug, Clone)]
51pub struct WitnessHandle {
52 inner: Arc<ImplicitWitness>,
53}
54
55/// Implicit representation of a cut witness
56///
57/// The witness represents a connected set U ⊆ V where:
58/// - U contains the seed vertex
59/// - |δ(U)| = boundary_size
60/// - membership\[v\] = true iff v ∈ U
61#[derive(Debug)]
62pub struct ImplicitWitness {
63 /// Seed vertex that defines the cut (always in U)
64 pub seed: VertexId,
65 /// Membership bitmap (vertex v is in U iff bit v is set)
66 pub membership: RoaringBitmap,
67 /// Current boundary size |δ(U)|
68 pub boundary_size: u64,
69 /// Hash for quick equality checks
70 pub hash: u64,
71}
72
73impl WitnessHandle {
74 /// Create a new witness handle
75 ///
76 /// # Arguments
77 ///
78 /// * `seed` - The seed vertex defining this cut (must be in membership)
79 /// * `membership` - Bitmap of vertices in the cut set U
80 /// * `boundary_size` - The size of the boundary |δ(U)|
81 ///
82 /// # Panics
83 ///
84 /// Panics if the seed vertex is not in the membership set (debug builds only)
85 ///
86 /// # Examples
87 ///
88 /// ```
89 /// use ruvector_mincut::instance::witness::WitnessHandle;
90 /// use roaring::RoaringBitmap;
91 ///
92 /// let mut membership = RoaringBitmap::new();
93 /// membership.insert(0);
94 /// membership.insert(1);
95 ///
96 /// let witness = WitnessHandle::new(0, membership, 5);
97 /// assert_eq!(witness.seed(), 0);
98 /// ```
99 pub fn new(seed: VertexId, membership: RoaringBitmap, boundary_size: u64) -> Self {
100 debug_assert!(
101 seed <= u32::MAX as u64,
102 "Seed vertex {} exceeds u32::MAX",
103 seed
104 );
105 debug_assert!(
106 membership.contains(seed as u32),
107 "Seed vertex {} must be in membership set",
108 seed
109 );
110
111 let hash = Self::compute_hash(seed, &membership);
112
113 Self {
114 inner: Arc::new(ImplicitWitness {
115 seed,
116 membership,
117 boundary_size,
118 hash,
119 }),
120 }
121 }
122
123 /// Compute hash for a witness
124 ///
125 /// The hash combines the seed vertex and membership bitmap for fast equality checks.
126 fn compute_hash(seed: VertexId, membership: &RoaringBitmap) -> u64 {
127 let mut hasher = DefaultHasher::new();
128 seed.hash(&mut hasher);
129
130 // Hash the membership bitmap by iterating its values
131 for vertex in membership.iter() {
132 vertex.hash(&mut hasher);
133 }
134
135 hasher.finish()
136 }
137
138 /// Check if vertex is in the cut set U
139 ///
140 /// # Time Complexity
141 ///
142 /// O(1) via bitmap lookup
143 ///
144 /// # Examples
145 ///
146 /// ```
147 /// use ruvector_mincut::instance::witness::WitnessHandle;
148 /// use roaring::RoaringBitmap;
149 ///
150 /// let mut membership = RoaringBitmap::new();
151 /// membership.insert(5);
152 /// membership.insert(10);
153 ///
154 /// let witness = WitnessHandle::new(5, membership, 3);
155 /// assert!(witness.contains(5));
156 /// assert!(witness.contains(10));
157 /// assert!(!witness.contains(15));
158 /// ```
159 #[inline]
160 pub fn contains(&self, v: VertexId) -> bool {
161 if v > u32::MAX as u64 {
162 return false;
163 }
164 self.inner.membership.contains(v as u32)
165 }
166
167 /// Get boundary size |δ(U)|
168 ///
169 /// Returns the pre-computed boundary size for O(1) access.
170 ///
171 /// # Examples
172 ///
173 /// ```
174 /// use ruvector_mincut::instance::witness::WitnessHandle;
175 /// use roaring::RoaringBitmap;
176 ///
177 /// let witness = WitnessHandle::new(1, RoaringBitmap::from_iter([1, 2, 3]), 7);
178 /// assert_eq!(witness.boundary_size(), 7);
179 /// ```
180 #[inline]
181 pub fn boundary_size(&self) -> u64 {
182 self.inner.boundary_size
183 }
184
185 /// Get the seed vertex
186 ///
187 /// # Examples
188 ///
189 /// ```
190 /// use ruvector_mincut::instance::witness::WitnessHandle;
191 /// use roaring::RoaringBitmap;
192 ///
193 /// let witness = WitnessHandle::new(42, RoaringBitmap::from_iter([42u32]), 1);
194 /// assert_eq!(witness.seed(), 42);
195 /// ```
196 #[inline]
197 pub fn seed(&self) -> VertexId {
198 self.inner.seed
199 }
200
201 /// Get the witness hash
202 ///
203 /// Used for fast equality checks without comparing full membership sets.
204 #[inline]
205 pub fn hash(&self) -> u64 {
206 self.inner.hash
207 }
208
209 /// Materialize full partition (U, V \ U)
210 ///
211 /// This is an expensive operation (O(|V|)) that converts the implicit
212 /// representation into explicit sets. Use sparingly, primarily for
213 /// debugging or verification.
214 ///
215 /// # Returns
216 ///
217 /// A tuple `(U, V_minus_U)` where:
218 /// - `U` is the set of vertices in the cut
219 /// - `V_minus_U` is the complement set
220 ///
221 /// # Note
222 ///
223 /// This method assumes vertices are numbered 0..max_vertex. For sparse
224 /// graphs, V \ U may contain vertex IDs that don't exist in the graph.
225 ///
226 /// # Examples
227 ///
228 /// ```
229 /// use ruvector_mincut::instance::witness::WitnessHandle;
230 /// use roaring::RoaringBitmap;
231 /// use std::collections::HashSet;
232 ///
233 /// let mut membership = RoaringBitmap::new();
234 /// membership.insert(1);
235 /// membership.insert(2);
236 ///
237 /// let witness = WitnessHandle::new(1, membership, 3);
238 /// let (u, _v_minus_u) = witness.materialize_partition();
239 ///
240 /// assert!(u.contains(&1));
241 /// assert!(u.contains(&2));
242 /// assert!(!u.contains(&3));
243 /// ```
244 pub fn materialize_partition(&self) -> (HashSet<VertexId>, HashSet<VertexId>) {
245 let u: HashSet<VertexId> = self.inner.membership.iter().map(|v| v as u64).collect();
246
247 // Find the maximum vertex ID to determine graph size
248 let max_vertex = self.inner.membership.max().unwrap_or(0) as u64;
249
250 // Create complement set
251 let v_minus_u: HashSet<VertexId> = (0..=max_vertex)
252 .filter(|&v| !self.inner.membership.contains(v as u32))
253 .collect();
254
255 (u, v_minus_u)
256 }
257
258 /// Get the cardinality of the cut set U
259 ///
260 /// # Examples
261 ///
262 /// ```
263 /// use ruvector_mincut::instance::witness::WitnessHandle;
264 /// use roaring::RoaringBitmap;
265 ///
266 /// let witness = WitnessHandle::new(1, RoaringBitmap::from_iter([1u32, 2u32, 3u32]), 5);
267 /// assert_eq!(witness.cardinality(), 3);
268 /// ```
269 #[inline]
270 pub fn cardinality(&self) -> u64 {
271 self.inner.membership.len()
272 }
273}
274
275impl PartialEq for WitnessHandle {
276 /// Fast equality check using hash
277 ///
278 /// First compares hashes (O(1)), then falls back to full comparison if needed.
279 fn eq(&self, other: &Self) -> bool {
280 // Fast path: compare hashes
281 if self.inner.hash != other.inner.hash {
282 return false;
283 }
284
285 // Slow path: compare actual membership
286 self.inner.seed == other.inner.seed
287 && self.inner.boundary_size == other.inner.boundary_size
288 && self.inner.membership == other.inner.membership
289 }
290}
291
292impl Eq for WitnessHandle {}
293
294/// Trait for witness operations
295///
296/// This trait abstracts witness operations for generic programming.
297/// The primary implementation is `WitnessHandle`.
298pub trait Witness {
299 /// Check if vertex is in the cut set U
300 fn contains(&self, v: VertexId) -> bool;
301
302 /// Get boundary size |δ(U)|
303 fn boundary_size(&self) -> u64;
304
305 /// Materialize full partition (expensive)
306 fn materialize_partition(&self) -> (HashSet<VertexId>, HashSet<VertexId>);
307
308 /// Get the seed vertex
309 fn seed(&self) -> VertexId;
310
311 /// Get cardinality of U
312 fn cardinality(&self) -> u64;
313}
314
315impl Witness for WitnessHandle {
316 #[inline]
317 fn contains(&self, v: VertexId) -> bool {
318 WitnessHandle::contains(self, v)
319 }
320
321 #[inline]
322 fn boundary_size(&self) -> u64 {
323 WitnessHandle::boundary_size(self)
324 }
325
326 fn materialize_partition(&self) -> (HashSet<VertexId>, HashSet<VertexId>) {
327 WitnessHandle::materialize_partition(self)
328 }
329
330 #[inline]
331 fn seed(&self) -> VertexId {
332 WitnessHandle::seed(self)
333 }
334
335 #[inline]
336 fn cardinality(&self) -> u64 {
337 WitnessHandle::cardinality(self)
338 }
339}
340
341/// Recipe for constructing a witness lazily
342///
343/// Instead of computing the full membership bitmap upfront, this struct
344/// stores the parameters needed to construct it on demand. This is useful
345/// when you need to store many potential witnesses but only access a few.
346///
347/// # Lazy Evaluation
348///
349/// The membership bitmap is only computed when:
350/// - `materialize()` is called to get a full `WitnessHandle`
351/// - `contains()` is called to check vertex membership
352/// - Any other operation that requires the full witness
353///
354/// # Memory Savings
355///
356/// A `LazyWitness` uses only ~32 bytes vs potentially kilobytes for a
357/// `WitnessHandle` with a large membership bitmap.
358///
359/// # Example
360///
361/// ```
362/// use ruvector_mincut::instance::witness::LazyWitness;
363///
364/// // Create a lazy witness recipe
365/// let lazy = LazyWitness::new(42, 10, 5);
366///
367/// // No computation happens until materialized
368/// assert_eq!(lazy.seed(), 42);
369/// assert_eq!(lazy.boundary_size(), 5);
370///
371/// // Calling with_adjacency materializes the witness
372/// // (requires adjacency data from the graph)
373/// ```
374#[derive(Debug, Clone)]
375pub struct LazyWitness {
376 /// Seed vertex that defines the cut
377 seed: VertexId,
378 /// Radius of the local search that found this witness
379 radius: usize,
380 /// Pre-computed boundary size
381 boundary_size: u64,
382 /// Cached materialized witness (computed on first access)
383 cached: std::sync::OnceLock<WitnessHandle>,
384}
385
386impl LazyWitness {
387 /// Create a new lazy witness
388 ///
389 /// # Arguments
390 ///
391 /// * `seed` - Seed vertex defining the cut
392 /// * `radius` - Radius of local search used
393 /// * `boundary_size` - Pre-computed boundary size
394 pub fn new(seed: VertexId, radius: usize, boundary_size: u64) -> Self {
395 Self {
396 seed,
397 radius,
398 boundary_size,
399 cached: std::sync::OnceLock::new(),
400 }
401 }
402
403 /// Get the seed vertex
404 #[inline]
405 pub fn seed(&self) -> VertexId {
406 self.seed
407 }
408
409 /// Get the search radius
410 #[inline]
411 pub fn radius(&self) -> usize {
412 self.radius
413 }
414
415 /// Get boundary size |δ(U)|
416 #[inline]
417 pub fn boundary_size(&self) -> u64 {
418 self.boundary_size
419 }
420
421 /// Check if the witness has been materialized
422 #[inline]
423 pub fn is_materialized(&self) -> bool {
424 self.cached.get().is_some()
425 }
426
427 /// Materialize the witness with adjacency information
428 ///
429 /// This performs a BFS from the seed vertex up to the given radius
430 /// to construct the full membership bitmap.
431 ///
432 /// # Arguments
433 ///
434 /// * `adjacency` - Function to get neighbors of a vertex
435 ///
436 /// # Returns
437 ///
438 /// A fully materialized `WitnessHandle`
439 pub fn materialize<F>(&self, adjacency: F) -> WitnessHandle
440 where
441 F: Fn(VertexId) -> Vec<VertexId>,
442 {
443 self.cached.get_or_init(|| {
444 // BFS from seed up to radius
445 let mut membership = RoaringBitmap::new();
446 let mut visited = HashSet::new();
447 let mut queue = std::collections::VecDeque::new();
448
449 queue.push_back((self.seed, 0usize));
450 visited.insert(self.seed);
451 membership.insert(self.seed as u32);
452
453 while let Some((vertex, dist)) = queue.pop_front() {
454 if dist >= self.radius {
455 continue;
456 }
457
458 for neighbor in adjacency(vertex) {
459 if visited.insert(neighbor) {
460 membership.insert(neighbor as u32);
461 queue.push_back((neighbor, dist + 1));
462 }
463 }
464 }
465
466 WitnessHandle::new(self.seed, membership, self.boundary_size)
467 }).clone()
468 }
469
470 /// Set a pre-computed witness (for cases where we already have it)
471 pub fn set_materialized(&self, witness: WitnessHandle) {
472 let _ = self.cached.set(witness);
473 }
474
475 /// Get the cached witness if already materialized
476 pub fn get_cached(&self) -> Option<&WitnessHandle> {
477 self.cached.get()
478 }
479}
480
481/// Batch of lazy witnesses for efficient storage
482///
483/// Stores multiple lazy witnesses compactly and tracks which
484/// have been materialized.
485#[derive(Debug, Default)]
486pub struct LazyWitnessBatch {
487 /// Lazy witnesses in this batch
488 witnesses: Vec<LazyWitness>,
489 /// Count of materialized witnesses
490 materialized_count: std::sync::atomic::AtomicUsize,
491}
492
493impl LazyWitnessBatch {
494 /// Create a new empty batch
495 pub fn new() -> Self {
496 Self {
497 witnesses: Vec::new(),
498 materialized_count: std::sync::atomic::AtomicUsize::new(0),
499 }
500 }
501
502 /// Create batch with capacity
503 pub fn with_capacity(capacity: usize) -> Self {
504 Self {
505 witnesses: Vec::with_capacity(capacity),
506 materialized_count: std::sync::atomic::AtomicUsize::new(0),
507 }
508 }
509
510 /// Add a lazy witness to the batch
511 pub fn push(&mut self, witness: LazyWitness) {
512 self.witnesses.push(witness);
513 }
514
515 /// Get witness by index
516 pub fn get(&self, index: usize) -> Option<&LazyWitness> {
517 self.witnesses.get(index)
518 }
519
520 /// Number of witnesses in batch
521 pub fn len(&self) -> usize {
522 self.witnesses.len()
523 }
524
525 /// Check if batch is empty
526 pub fn is_empty(&self) -> bool {
527 self.witnesses.is_empty()
528 }
529
530 /// Count of materialized witnesses
531 pub fn materialized_count(&self) -> usize {
532 self.materialized_count.load(std::sync::atomic::Ordering::Relaxed)
533 }
534
535 /// Materialize a specific witness
536 pub fn materialize<F>(&self, index: usize, adjacency: F) -> Option<WitnessHandle>
537 where
538 F: Fn(VertexId) -> Vec<VertexId>,
539 {
540 self.witnesses.get(index).map(|lazy| {
541 let was_materialized = lazy.is_materialized();
542 let handle = lazy.materialize(adjacency);
543 if !was_materialized {
544 self.materialized_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
545 }
546 handle
547 })
548 }
549
550 /// Find witness with smallest boundary (materializes only as needed)
551 pub fn find_smallest_boundary(&self) -> Option<&LazyWitness> {
552 self.witnesses.iter().min_by_key(|w| w.boundary_size())
553 }
554
555 /// Iterate over all lazy witnesses
556 pub fn iter(&self) -> impl Iterator<Item = &LazyWitness> {
557 self.witnesses.iter()
558 }
559}
560
561#[cfg(test)]
562mod lazy_tests {
563 use super::*;
564
565 #[test]
566 fn test_lazy_witness_new() {
567 let lazy = LazyWitness::new(42, 5, 10);
568 assert_eq!(lazy.seed(), 42);
569 assert_eq!(lazy.radius(), 5);
570 assert_eq!(lazy.boundary_size(), 10);
571 assert!(!lazy.is_materialized());
572 }
573
574 #[test]
575 fn test_lazy_witness_materialize() {
576 let lazy = LazyWitness::new(0, 2, 3);
577
578 // Simple adjacency: linear graph 0-1-2-3-4
579 let adjacency = |v: VertexId| -> Vec<VertexId> {
580 match v {
581 0 => vec![1],
582 1 => vec![0, 2],
583 2 => vec![1, 3],
584 3 => vec![2, 4],
585 4 => vec![3],
586 _ => vec![],
587 }
588 };
589
590 let handle = lazy.materialize(adjacency);
591
592 // With radius 2 from vertex 0, should include 0, 1, 2
593 assert!(handle.contains(0));
594 assert!(handle.contains(1));
595 assert!(handle.contains(2));
596 assert!(!handle.contains(3)); // Beyond radius 2
597 assert!(lazy.is_materialized());
598 }
599
600 #[test]
601 fn test_lazy_witness_caching() {
602 let lazy = LazyWitness::new(0, 1, 5);
603
604 let call_count = std::sync::atomic::AtomicUsize::new(0);
605 let adjacency = |v: VertexId| -> Vec<VertexId> {
606 call_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
607 if v == 0 { vec![1, 2] } else { vec![] }
608 };
609
610 // First materialization
611 let _h1 = lazy.materialize(&adjacency);
612 let first_count = call_count.load(std::sync::atomic::Ordering::Relaxed);
613
614 // Second materialization should use cache
615 let _h2 = lazy.materialize(&adjacency);
616 let second_count = call_count.load(std::sync::atomic::Ordering::Relaxed);
617
618 // Adjacency should only be called during first materialization
619 assert_eq!(first_count, second_count);
620 }
621
622 #[test]
623 fn test_lazy_witness_batch() {
624 let mut batch = LazyWitnessBatch::with_capacity(3);
625
626 batch.push(LazyWitness::new(0, 2, 5));
627 batch.push(LazyWitness::new(1, 3, 3)); // Smallest boundary
628 batch.push(LazyWitness::new(2, 1, 7));
629
630 assert_eq!(batch.len(), 3);
631 assert_eq!(batch.materialized_count(), 0);
632
633 // Find smallest boundary
634 let smallest = batch.find_smallest_boundary().unwrap();
635 assert_eq!(smallest.seed(), 1);
636 assert_eq!(smallest.boundary_size(), 3);
637 }
638
639 #[test]
640 fn test_batch_materialize() {
641 let mut batch = LazyWitnessBatch::new();
642 batch.push(LazyWitness::new(0, 1, 5));
643
644 let adjacency = |_v: VertexId| -> Vec<VertexId> { vec![1, 2] };
645
646 let handle = batch.materialize(0, adjacency).unwrap();
647 assert!(handle.contains(0));
648 assert!(handle.contains(1));
649 assert!(handle.contains(2));
650
651 assert_eq!(batch.materialized_count(), 1);
652 }
653}