ruvector_mincut/instance/
witness.rs

1//! Witness types for cut certification
2//!
3//! A witness represents a connected set U ⊆ V with its boundary δ(U).
4//! The witness certifies that a proper cut exists with value |δ(U)|.
5//!
6//! # Representation
7//!
8//! Witnesses use an implicit representation for memory efficiency:
9//! - **Seed vertex**: The starting vertex that defines the connected component
10//! - **Membership bitmap**: Compressed bitmap indicating which vertices are in U
11//! - **Boundary size**: Pre-computed value |δ(U)| for O(1) queries
12//! - **Hash**: Fast equality checking without full comparison
13//!
14//! # Performance
15//!
16//! - `WitnessHandle` uses `Arc` for cheap cloning (O(1))
17//! - `contains()` is O(1) via bitmap lookup
18//! - `boundary_size()` is O(1) via cached value
19//! - `materialize_partition()` is O(|V|) and should be used sparingly
20
21use crate::graph::VertexId;
22use roaring::RoaringBitmap;
23use std::collections::HashSet;
24use std::sync::Arc;
25use std::hash::{Hash, Hasher};
26use std::collections::hash_map::DefaultHasher;
27
28/// Handle to a witness (cheap to clone)
29///
30/// This is the primary type for passing witnesses around. It uses an `Arc`
31/// internally so cloning is O(1) and witnesses can be shared across threads.
32///
33/// # Examples
34///
35/// ```
36/// use ruvector_mincut::instance::witness::WitnessHandle;
37/// use roaring::RoaringBitmap;
38///
39/// let mut membership = RoaringBitmap::new();
40/// membership.insert(1);
41/// membership.insert(2);
42/// membership.insert(3);
43///
44/// let witness = WitnessHandle::new(1, membership, 4);
45/// assert!(witness.contains(1));
46/// assert!(witness.contains(2));
47/// assert!(!witness.contains(5));
48/// assert_eq!(witness.boundary_size(), 4);
49/// ```
50#[derive(Debug, Clone)]
51pub struct WitnessHandle {
52    inner: Arc<ImplicitWitness>,
53}
54
55/// Implicit representation of a cut witness
56///
57/// The witness represents a connected set U ⊆ V where:
58/// - U contains the seed vertex
59/// - |δ(U)| = boundary_size
60/// - membership\[v\] = true iff v ∈ U
61#[derive(Debug)]
62pub struct ImplicitWitness {
63    /// Seed vertex that defines the cut (always in U)
64    pub seed: VertexId,
65    /// Membership bitmap (vertex v is in U iff bit v is set)
66    pub membership: RoaringBitmap,
67    /// Current boundary size |δ(U)|
68    pub boundary_size: u64,
69    /// Hash for quick equality checks
70    pub hash: u64,
71}
72
73impl WitnessHandle {
74    /// Create a new witness handle
75    ///
76    /// # Arguments
77    ///
78    /// * `seed` - The seed vertex defining this cut (must be in membership)
79    /// * `membership` - Bitmap of vertices in the cut set U
80    /// * `boundary_size` - The size of the boundary |δ(U)|
81    ///
82    /// # Panics
83    ///
84    /// Panics if the seed vertex is not in the membership set (debug builds only)
85    ///
86    /// # Examples
87    ///
88    /// ```
89    /// use ruvector_mincut::instance::witness::WitnessHandle;
90    /// use roaring::RoaringBitmap;
91    ///
92    /// let mut membership = RoaringBitmap::new();
93    /// membership.insert(0);
94    /// membership.insert(1);
95    ///
96    /// let witness = WitnessHandle::new(0, membership, 5);
97    /// assert_eq!(witness.seed(), 0);
98    /// ```
99    pub fn new(seed: VertexId, membership: RoaringBitmap, boundary_size: u64) -> Self {
100        debug_assert!(
101            seed <= u32::MAX as u64,
102            "Seed vertex {} exceeds u32::MAX",
103            seed
104        );
105        debug_assert!(
106            membership.contains(seed as u32),
107            "Seed vertex {} must be in membership set",
108            seed
109        );
110
111        let hash = Self::compute_hash(seed, &membership);
112
113        Self {
114            inner: Arc::new(ImplicitWitness {
115                seed,
116                membership,
117                boundary_size,
118                hash,
119            }),
120        }
121    }
122
123    /// Compute hash for a witness
124    ///
125    /// The hash combines the seed vertex and membership bitmap for fast equality checks.
126    fn compute_hash(seed: VertexId, membership: &RoaringBitmap) -> u64 {
127        let mut hasher = DefaultHasher::new();
128        seed.hash(&mut hasher);
129
130        // Hash the membership bitmap by iterating its values
131        for vertex in membership.iter() {
132            vertex.hash(&mut hasher);
133        }
134
135        hasher.finish()
136    }
137
138    /// Check if vertex is in the cut set U
139    ///
140    /// # Time Complexity
141    ///
142    /// O(1) via bitmap lookup
143    ///
144    /// # Examples
145    ///
146    /// ```
147    /// use ruvector_mincut::instance::witness::WitnessHandle;
148    /// use roaring::RoaringBitmap;
149    ///
150    /// let mut membership = RoaringBitmap::new();
151    /// membership.insert(5);
152    /// membership.insert(10);
153    ///
154    /// let witness = WitnessHandle::new(5, membership, 3);
155    /// assert!(witness.contains(5));
156    /// assert!(witness.contains(10));
157    /// assert!(!witness.contains(15));
158    /// ```
159    #[inline]
160    pub fn contains(&self, v: VertexId) -> bool {
161        if v > u32::MAX as u64 {
162            return false;
163        }
164        self.inner.membership.contains(v as u32)
165    }
166
167    /// Get boundary size |δ(U)|
168    ///
169    /// Returns the pre-computed boundary size for O(1) access.
170    ///
171    /// # Examples
172    ///
173    /// ```
174    /// use ruvector_mincut::instance::witness::WitnessHandle;
175    /// use roaring::RoaringBitmap;
176    ///
177    /// let witness = WitnessHandle::new(1, RoaringBitmap::from_iter([1, 2, 3]), 7);
178    /// assert_eq!(witness.boundary_size(), 7);
179    /// ```
180    #[inline]
181    pub fn boundary_size(&self) -> u64 {
182        self.inner.boundary_size
183    }
184
185    /// Get the seed vertex
186    ///
187    /// # Examples
188    ///
189    /// ```
190    /// use ruvector_mincut::instance::witness::WitnessHandle;
191    /// use roaring::RoaringBitmap;
192    ///
193    /// let witness = WitnessHandle::new(42, RoaringBitmap::from_iter([42u32]), 1);
194    /// assert_eq!(witness.seed(), 42);
195    /// ```
196    #[inline]
197    pub fn seed(&self) -> VertexId {
198        self.inner.seed
199    }
200
201    /// Get the witness hash
202    ///
203    /// Used for fast equality checks without comparing full membership sets.
204    #[inline]
205    pub fn hash(&self) -> u64 {
206        self.inner.hash
207    }
208
209    /// Materialize full partition (U, V \ U)
210    ///
211    /// This is an expensive operation (O(|V|)) that converts the implicit
212    /// representation into explicit sets. Use sparingly, primarily for
213    /// debugging or verification.
214    ///
215    /// # Returns
216    ///
217    /// A tuple `(U, V_minus_U)` where:
218    /// - `U` is the set of vertices in the cut
219    /// - `V_minus_U` is the complement set
220    ///
221    /// # Note
222    ///
223    /// This method assumes vertices are numbered 0..max_vertex. For sparse
224    /// graphs, V \ U may contain vertex IDs that don't exist in the graph.
225    ///
226    /// # Examples
227    ///
228    /// ```
229    /// use ruvector_mincut::instance::witness::WitnessHandle;
230    /// use roaring::RoaringBitmap;
231    /// use std::collections::HashSet;
232    ///
233    /// let mut membership = RoaringBitmap::new();
234    /// membership.insert(1);
235    /// membership.insert(2);
236    ///
237    /// let witness = WitnessHandle::new(1, membership, 3);
238    /// let (u, _v_minus_u) = witness.materialize_partition();
239    ///
240    /// assert!(u.contains(&1));
241    /// assert!(u.contains(&2));
242    /// assert!(!u.contains(&3));
243    /// ```
244    pub fn materialize_partition(&self) -> (HashSet<VertexId>, HashSet<VertexId>) {
245        let u: HashSet<VertexId> = self.inner.membership.iter().map(|v| v as u64).collect();
246
247        // Find the maximum vertex ID to determine graph size
248        let max_vertex = self.inner.membership.max().unwrap_or(0) as u64;
249
250        // Create complement set
251        let v_minus_u: HashSet<VertexId> = (0..=max_vertex)
252            .filter(|&v| !self.inner.membership.contains(v as u32))
253            .collect();
254
255        (u, v_minus_u)
256    }
257
258    /// Get the cardinality of the cut set U
259    ///
260    /// # Examples
261    ///
262    /// ```
263    /// use ruvector_mincut::instance::witness::WitnessHandle;
264    /// use roaring::RoaringBitmap;
265    ///
266    /// let witness = WitnessHandle::new(1, RoaringBitmap::from_iter([1u32, 2u32, 3u32]), 5);
267    /// assert_eq!(witness.cardinality(), 3);
268    /// ```
269    #[inline]
270    pub fn cardinality(&self) -> u64 {
271        self.inner.membership.len()
272    }
273}
274
275impl PartialEq for WitnessHandle {
276    /// Fast equality check using hash
277    ///
278    /// First compares hashes (O(1)), then falls back to full comparison if needed.
279    fn eq(&self, other: &Self) -> bool {
280        // Fast path: compare hashes
281        if self.inner.hash != other.inner.hash {
282            return false;
283        }
284
285        // Slow path: compare actual membership
286        self.inner.seed == other.inner.seed
287            && self.inner.boundary_size == other.inner.boundary_size
288            && self.inner.membership == other.inner.membership
289    }
290}
291
292impl Eq for WitnessHandle {}
293
294/// Trait for witness operations
295///
296/// This trait abstracts witness operations for generic programming.
297/// The primary implementation is `WitnessHandle`.
298pub trait Witness {
299    /// Check if vertex is in the cut set U
300    fn contains(&self, v: VertexId) -> bool;
301
302    /// Get boundary size |δ(U)|
303    fn boundary_size(&self) -> u64;
304
305    /// Materialize full partition (expensive)
306    fn materialize_partition(&self) -> (HashSet<VertexId>, HashSet<VertexId>);
307
308    /// Get the seed vertex
309    fn seed(&self) -> VertexId;
310
311    /// Get cardinality of U
312    fn cardinality(&self) -> u64;
313}
314
315impl Witness for WitnessHandle {
316    #[inline]
317    fn contains(&self, v: VertexId) -> bool {
318        WitnessHandle::contains(self, v)
319    }
320
321    #[inline]
322    fn boundary_size(&self) -> u64 {
323        WitnessHandle::boundary_size(self)
324    }
325
326    fn materialize_partition(&self) -> (HashSet<VertexId>, HashSet<VertexId>) {
327        WitnessHandle::materialize_partition(self)
328    }
329
330    #[inline]
331    fn seed(&self) -> VertexId {
332        WitnessHandle::seed(self)
333    }
334
335    #[inline]
336    fn cardinality(&self) -> u64 {
337        WitnessHandle::cardinality(self)
338    }
339}
340
341/// Recipe for constructing a witness lazily
342///
343/// Instead of computing the full membership bitmap upfront, this struct
344/// stores the parameters needed to construct it on demand. This is useful
345/// when you need to store many potential witnesses but only access a few.
346///
347/// # Lazy Evaluation
348///
349/// The membership bitmap is only computed when:
350/// - `materialize()` is called to get a full `WitnessHandle`
351/// - `contains()` is called to check vertex membership
352/// - Any other operation that requires the full witness
353///
354/// # Memory Savings
355///
356/// A `LazyWitness` uses only ~32 bytes vs potentially kilobytes for a
357/// `WitnessHandle` with a large membership bitmap.
358///
359/// # Example
360///
361/// ```
362/// use ruvector_mincut::instance::witness::LazyWitness;
363///
364/// // Create a lazy witness recipe
365/// let lazy = LazyWitness::new(42, 10, 5);
366///
367/// // No computation happens until materialized
368/// assert_eq!(lazy.seed(), 42);
369/// assert_eq!(lazy.boundary_size(), 5);
370///
371/// // Calling with_adjacency materializes the witness
372/// // (requires adjacency data from the graph)
373/// ```
374#[derive(Debug, Clone)]
375pub struct LazyWitness {
376    /// Seed vertex that defines the cut
377    seed: VertexId,
378    /// Radius of the local search that found this witness
379    radius: usize,
380    /// Pre-computed boundary size
381    boundary_size: u64,
382    /// Cached materialized witness (computed on first access)
383    cached: std::sync::OnceLock<WitnessHandle>,
384}
385
386impl LazyWitness {
387    /// Create a new lazy witness
388    ///
389    /// # Arguments
390    ///
391    /// * `seed` - Seed vertex defining the cut
392    /// * `radius` - Radius of local search used
393    /// * `boundary_size` - Pre-computed boundary size
394    pub fn new(seed: VertexId, radius: usize, boundary_size: u64) -> Self {
395        Self {
396            seed,
397            radius,
398            boundary_size,
399            cached: std::sync::OnceLock::new(),
400        }
401    }
402
403    /// Get the seed vertex
404    #[inline]
405    pub fn seed(&self) -> VertexId {
406        self.seed
407    }
408
409    /// Get the search radius
410    #[inline]
411    pub fn radius(&self) -> usize {
412        self.radius
413    }
414
415    /// Get boundary size |δ(U)|
416    #[inline]
417    pub fn boundary_size(&self) -> u64 {
418        self.boundary_size
419    }
420
421    /// Check if the witness has been materialized
422    #[inline]
423    pub fn is_materialized(&self) -> bool {
424        self.cached.get().is_some()
425    }
426
427    /// Materialize the witness with adjacency information
428    ///
429    /// This performs a BFS from the seed vertex up to the given radius
430    /// to construct the full membership bitmap.
431    ///
432    /// # Arguments
433    ///
434    /// * `adjacency` - Function to get neighbors of a vertex
435    ///
436    /// # Returns
437    ///
438    /// A fully materialized `WitnessHandle`
439    pub fn materialize<F>(&self, adjacency: F) -> WitnessHandle
440    where
441        F: Fn(VertexId) -> Vec<VertexId>,
442    {
443        self.cached.get_or_init(|| {
444            // BFS from seed up to radius
445            let mut membership = RoaringBitmap::new();
446            let mut visited = HashSet::new();
447            let mut queue = std::collections::VecDeque::new();
448
449            queue.push_back((self.seed, 0usize));
450            visited.insert(self.seed);
451            membership.insert(self.seed as u32);
452
453            while let Some((vertex, dist)) = queue.pop_front() {
454                if dist >= self.radius {
455                    continue;
456                }
457
458                for neighbor in adjacency(vertex) {
459                    if visited.insert(neighbor) {
460                        membership.insert(neighbor as u32);
461                        queue.push_back((neighbor, dist + 1));
462                    }
463                }
464            }
465
466            WitnessHandle::new(self.seed, membership, self.boundary_size)
467        }).clone()
468    }
469
470    /// Set a pre-computed witness (for cases where we already have it)
471    pub fn set_materialized(&self, witness: WitnessHandle) {
472        let _ = self.cached.set(witness);
473    }
474
475    /// Get the cached witness if already materialized
476    pub fn get_cached(&self) -> Option<&WitnessHandle> {
477        self.cached.get()
478    }
479}
480
481/// Batch of lazy witnesses for efficient storage
482///
483/// Stores multiple lazy witnesses compactly and tracks which
484/// have been materialized.
485#[derive(Debug, Default)]
486pub struct LazyWitnessBatch {
487    /// Lazy witnesses in this batch
488    witnesses: Vec<LazyWitness>,
489    /// Count of materialized witnesses
490    materialized_count: std::sync::atomic::AtomicUsize,
491}
492
493impl LazyWitnessBatch {
494    /// Create a new empty batch
495    pub fn new() -> Self {
496        Self {
497            witnesses: Vec::new(),
498            materialized_count: std::sync::atomic::AtomicUsize::new(0),
499        }
500    }
501
502    /// Create batch with capacity
503    pub fn with_capacity(capacity: usize) -> Self {
504        Self {
505            witnesses: Vec::with_capacity(capacity),
506            materialized_count: std::sync::atomic::AtomicUsize::new(0),
507        }
508    }
509
510    /// Add a lazy witness to the batch
511    pub fn push(&mut self, witness: LazyWitness) {
512        self.witnesses.push(witness);
513    }
514
515    /// Get witness by index
516    pub fn get(&self, index: usize) -> Option<&LazyWitness> {
517        self.witnesses.get(index)
518    }
519
520    /// Number of witnesses in batch
521    pub fn len(&self) -> usize {
522        self.witnesses.len()
523    }
524
525    /// Check if batch is empty
526    pub fn is_empty(&self) -> bool {
527        self.witnesses.is_empty()
528    }
529
530    /// Count of materialized witnesses
531    pub fn materialized_count(&self) -> usize {
532        self.materialized_count.load(std::sync::atomic::Ordering::Relaxed)
533    }
534
535    /// Materialize a specific witness
536    pub fn materialize<F>(&self, index: usize, adjacency: F) -> Option<WitnessHandle>
537    where
538        F: Fn(VertexId) -> Vec<VertexId>,
539    {
540        self.witnesses.get(index).map(|lazy| {
541            let was_materialized = lazy.is_materialized();
542            let handle = lazy.materialize(adjacency);
543            if !was_materialized {
544                self.materialized_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
545            }
546            handle
547        })
548    }
549
550    /// Find witness with smallest boundary (materializes only as needed)
551    pub fn find_smallest_boundary(&self) -> Option<&LazyWitness> {
552        self.witnesses.iter().min_by_key(|w| w.boundary_size())
553    }
554
555    /// Iterate over all lazy witnesses
556    pub fn iter(&self) -> impl Iterator<Item = &LazyWitness> {
557        self.witnesses.iter()
558    }
559}
560
561#[cfg(test)]
562mod lazy_tests {
563    use super::*;
564
565    #[test]
566    fn test_lazy_witness_new() {
567        let lazy = LazyWitness::new(42, 5, 10);
568        assert_eq!(lazy.seed(), 42);
569        assert_eq!(lazy.radius(), 5);
570        assert_eq!(lazy.boundary_size(), 10);
571        assert!(!lazy.is_materialized());
572    }
573
574    #[test]
575    fn test_lazy_witness_materialize() {
576        let lazy = LazyWitness::new(0, 2, 3);
577
578        // Simple adjacency: linear graph 0-1-2-3-4
579        let adjacency = |v: VertexId| -> Vec<VertexId> {
580            match v {
581                0 => vec![1],
582                1 => vec![0, 2],
583                2 => vec![1, 3],
584                3 => vec![2, 4],
585                4 => vec![3],
586                _ => vec![],
587            }
588        };
589
590        let handle = lazy.materialize(adjacency);
591
592        // With radius 2 from vertex 0, should include 0, 1, 2
593        assert!(handle.contains(0));
594        assert!(handle.contains(1));
595        assert!(handle.contains(2));
596        assert!(!handle.contains(3)); // Beyond radius 2
597        assert!(lazy.is_materialized());
598    }
599
600    #[test]
601    fn test_lazy_witness_caching() {
602        let lazy = LazyWitness::new(0, 1, 5);
603
604        let call_count = std::sync::atomic::AtomicUsize::new(0);
605        let adjacency = |v: VertexId| -> Vec<VertexId> {
606            call_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
607            if v == 0 { vec![1, 2] } else { vec![] }
608        };
609
610        // First materialization
611        let _h1 = lazy.materialize(&adjacency);
612        let first_count = call_count.load(std::sync::atomic::Ordering::Relaxed);
613
614        // Second materialization should use cache
615        let _h2 = lazy.materialize(&adjacency);
616        let second_count = call_count.load(std::sync::atomic::Ordering::Relaxed);
617
618        // Adjacency should only be called during first materialization
619        assert_eq!(first_count, second_count);
620    }
621
622    #[test]
623    fn test_lazy_witness_batch() {
624        let mut batch = LazyWitnessBatch::with_capacity(3);
625
626        batch.push(LazyWitness::new(0, 2, 5));
627        batch.push(LazyWitness::new(1, 3, 3)); // Smallest boundary
628        batch.push(LazyWitness::new(2, 1, 7));
629
630        assert_eq!(batch.len(), 3);
631        assert_eq!(batch.materialized_count(), 0);
632
633        // Find smallest boundary
634        let smallest = batch.find_smallest_boundary().unwrap();
635        assert_eq!(smallest.seed(), 1);
636        assert_eq!(smallest.boundary_size(), 3);
637    }
638
639    #[test]
640    fn test_batch_materialize() {
641        let mut batch = LazyWitnessBatch::new();
642        batch.push(LazyWitness::new(0, 1, 5));
643
644        let adjacency = |_v: VertexId| -> Vec<VertexId> { vec![1, 2] };
645
646        let handle = batch.materialize(0, adjacency).unwrap();
647        assert!(handle.contains(0));
648        assert!(handle.contains(1));
649        assert!(handle.contains(2));
650
651        assert_eq!(batch.materialized_count(), 1);
652    }
653}