Skip to main content

provenant/license_detection/
position_set.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use bit_set::BitSet;
5
6use crate::license_detection::models::position_span::PositionSpan;
7
8/// A set of usize positions stored as a BitSet.
9/// Provides O(1) membership testing and efficient set operations.
10/// Caches bounds for cheap overlap pre-checks.
11#[derive(Clone, Debug, PartialEq, Eq)]
12pub struct PositionSet {
13    bitset: BitSet,
14    min_pos: usize,
15    max_pos: usize,
16}
17
18impl PositionSet {
19    /// Create a PositionSet from an iterator of usize positions.
20    pub fn from_usize_iter<I: IntoIterator<Item = usize>>(iter: I) -> Self {
21        let mut bitset = BitSet::new();
22        let mut min_pos = usize::MAX;
23        let mut max_pos = 0;
24
25        for pos in iter {
26            bitset.insert(pos);
27            min_pos = min_pos.min(pos);
28            max_pos = max_pos.max(pos);
29        }
30
31        Self {
32            bitset,
33            min_pos,
34            max_pos,
35        }
36    }
37
38    /// Create an empty PositionSet.
39    pub fn new() -> Self {
40        Self {
41            bitset: BitSet::new(),
42            min_pos: usize::MAX,
43            max_pos: 0,
44        }
45    }
46
47    /// Number of positions in the set.
48    pub fn len(&self) -> usize {
49        self.bitset.count()
50    }
51
52    /// Is the set empty?
53    pub fn is_empty(&self) -> bool {
54        self.bitset.is_empty()
55    }
56
57    /// Returns the minimum position in the set.
58    ///
59    /// Returns `usize::MAX` for an empty set.
60    pub fn min_pos(&self) -> usize {
61        self.min_pos
62    }
63
64    /// Returns the maximum position in the set.
65    ///
66    /// Returns `0` for an empty set.
67    pub fn max_pos(&self) -> usize {
68        self.max_pos
69    }
70
71    /// Insert a position.
72    pub fn insert(&mut self, pos: usize) -> bool {
73        let inserted = self.bitset.insert(pos);
74        if inserted {
75            self.min_pos = self.min_pos.min(pos);
76            self.max_pos = self.max_pos.max(pos);
77        }
78        inserted
79    }
80
81    /// Extend this set from a PositionSpan without allocating an intermediate set.
82    pub fn extend_from_span(&mut self, span: &PositionSpan) {
83        match span {
84            PositionSpan::Range { start, end } => {
85                for pos in *start..*end {
86                    self.insert(pos);
87                }
88            }
89            PositionSpan::Discrete(positions) => {
90                for &pos in positions {
91                    self.insert(pos);
92                }
93            }
94        }
95    }
96
97    /// Check if position is in the set.
98    pub fn contains(&self, pos: usize) -> bool {
99        self.bitset.contains(pos)
100    }
101
102    /// Remove a position from the set.
103    pub fn remove(&mut self, pos: usize) -> bool {
104        self.bitset.remove(pos)
105    }
106
107    /// Remove all positions in a span from the set.
108    pub fn remove_span(&mut self, span: &PositionSpan) {
109        for pos in span.iter() {
110            self.remove(pos);
111        }
112    }
113
114    /// Quick check if a range [range_start, range_end) might overlap with this set.
115    /// Returns true if the bounding boxes overlap, false if they definitely don't.
116    /// This is O(1) and used as a pre-filter before the expensive BitSet check.
117    #[inline]
118    pub fn may_overlap_range(&self, range_start: usize, range_end: usize) -> bool {
119        // min_pos == usize::MAX means empty set (see new())
120        if self.min_pos == usize::MAX {
121            return false;
122        }
123        range_end > self.min_pos && range_start <= self.max_pos
124    }
125
126    /// Compute the union of this set with another PositionSet.
127    ///
128    /// Returns a new PositionSet containing all positions from both sets.
129    pub fn union(&self, other: &PositionSet) -> PositionSet {
130        let mut result = self.clone();
131        for pos in other.iter() {
132            result.insert(pos);
133        }
134        result
135    }
136
137    /// Return the difference (elements in self but not in other).
138    pub fn difference(&self, other: &PositionSet) -> PositionSet {
139        let mut result = PositionSet::new();
140        for pos in self.bitset.iter() {
141            if !other.bitset.contains(pos) {
142                result.insert(pos);
143            }
144        }
145        result
146    }
147
148    /// Count elements in the intersection of self and other.
149    pub fn intersection_len(&self, other: &PositionSet) -> usize {
150        self.bitset
151            .iter()
152            .filter(|&p| other.bitset.contains(p))
153            .count()
154    }
155
156    /// Check if this set overlaps with a PositionSpan.
157    /// Uses O(1) bounds check before the O(n) element-wise check.
158    pub fn overlaps_span(&self, span: &PositionSpan) -> bool {
159        let (span_min, span_max) = span.bounds();
160        if span.is_empty() {
161            return false;
162        }
163        if !self.may_overlap_range(span_min, span_max) {
164            return false;
165        }
166        span.iter().any(|p| self.contains(p))
167    }
168
169    /// Check if this set contains all positions in a range.
170    /// Returns true for empty ranges.
171    pub fn contains_range(&self, range: std::ops::Range<usize>) -> bool {
172        if range.is_empty() {
173            return true;
174        }
175        let (start, end) = (range.start, range.end);
176        if !self.may_overlap_range(start, end) {
177            return false;
178        }
179        (start..end).all(|pos| self.contains(pos))
180    }
181
182    /// Iterate over positions.
183    pub fn iter(&self) -> impl Iterator<Item = usize> + '_ {
184        self.bitset.iter()
185    }
186
187    /// Convert this PositionSet to a PositionSpan.
188    ///
189    /// If positions are contiguous, returns a Range; otherwise returns Discrete.
190    pub fn to_position_span(&self) -> PositionSpan {
191        if self.is_empty() {
192            return PositionSpan::empty();
193        }
194
195        let positions: Vec<usize> = self.iter().collect();
196        let is_contiguous = positions.windows(2).all(|w| w[1] == w[0] + 1);
197
198        if is_contiguous {
199            PositionSpan::range(self.min_pos, self.max_pos + 1)
200        } else {
201            PositionSpan::from_positions(positions)
202        }
203    }
204}
205
206impl Default for PositionSet {
207    fn default() -> Self {
208        Self::new()
209    }
210}
211
212impl std::iter::FromIterator<usize> for PositionSet {
213    fn from_iter<T: IntoIterator<Item = usize>>(iter: T) -> Self {
214        Self::from_usize_iter(iter)
215    }
216}
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221
222    #[test]
223    fn test_new_empty() {
224        let set = PositionSet::new();
225        assert!(set.is_empty());
226        assert_eq!(set.len(), 0);
227    }
228
229    #[test]
230    fn test_from_usize_iter_sorted() {
231        let set = PositionSet::from_usize_iter(vec![1, 2, 3]);
232        assert_eq!(set.len(), 3);
233        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
234    }
235
236    #[test]
237    fn test_from_usize_iter_unsorted() {
238        let set = PositionSet::from_usize_iter(vec![3, 1, 2]);
239        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
240    }
241
242    #[test]
243    fn test_from_usize_iter_dedup() {
244        let set = PositionSet::from_usize_iter(vec![1, 2, 2, 3, 3, 3]);
245        assert_eq!(set.len(), 3);
246        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
247    }
248
249    #[test]
250    fn test_insert() {
251        let mut set = PositionSet::new();
252        assert!(set.insert(2));
253        assert!(set.insert(1));
254        assert!(set.insert(3));
255        assert!(!set.insert(2)); // Already present
256        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
257    }
258
259    #[test]
260    fn test_difference() {
261        let a = PositionSet::from_usize_iter(vec![1, 2, 3, 4]);
262        let b = PositionSet::from_usize_iter(vec![2, 4, 6]);
263        let diff = a.difference(&b);
264        assert_eq!(diff.iter().collect::<Vec<_>>(), vec![1, 3]);
265    }
266
267    #[test]
268    fn test_difference_empty() {
269        let a = PositionSet::from_usize_iter(vec![1, 2, 3]);
270        let b = PositionSet::new();
271        let diff = a.difference(&b);
272        assert_eq!(diff.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
273    }
274
275    #[test]
276    fn test_difference_all_overlap() {
277        let a = PositionSet::from_usize_iter(vec![1, 2, 3]);
278        let b = PositionSet::from_usize_iter(vec![1, 2, 3]);
279        let diff = a.difference(&b);
280        assert!(diff.is_empty());
281    }
282
283    #[test]
284    fn test_contains() {
285        let set = PositionSet::from_usize_iter(vec![1, 3, 5]);
286        assert!(set.contains(1));
287        assert!(set.contains(3));
288        assert!(set.contains(5));
289        assert!(!set.contains(0));
290        assert!(!set.contains(2));
291        assert!(!set.contains(4));
292    }
293
294    #[test]
295    fn test_collect() {
296        let set: PositionSet = vec![3, 1, 2].into_iter().collect();
297        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
298    }
299
300    #[test]
301    fn test_extend_from_span_range() {
302        let mut set = PositionSet::new();
303        set.extend_from_span(&PositionSpan::range(5, 10));
304        assert_eq!(set.len(), 5);
305        assert!(set.contains(5));
306        assert!(set.contains(9));
307        assert!(!set.contains(4));
308        assert!(!set.contains(10));
309    }
310
311    #[test]
312    fn test_extend_from_span_discrete() {
313        let mut set = PositionSet::new();
314        set.extend_from_span(&PositionSpan::from_positions(vec![1, 3, 5]));
315        assert_eq!(set.len(), 3);
316        assert!(set.contains(1));
317        assert!(set.contains(3));
318        assert!(set.contains(5));
319        assert!(!set.contains(2));
320    }
321
322    #[test]
323    fn test_extend_from_span_merge() {
324        let mut set = PositionSet::from_usize_iter(vec![1, 2, 3]);
325        set.extend_from_span(&PositionSpan::range(2, 6));
326        assert_eq!(set.len(), 5);
327        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3, 4, 5]);
328    }
329
330    #[test]
331    fn test_overlaps_span_range_yes() {
332        let set = PositionSet::from_usize_iter(vec![5, 6, 7]);
333        assert!(set.overlaps_span(&PositionSpan::range(6, 10)));
334        assert!(set.overlaps_span(&PositionSpan::range(0, 6)));
335    }
336
337    #[test]
338    fn test_overlaps_span_range_no() {
339        let set = PositionSet::from_usize_iter(vec![1, 2, 3]);
340        assert!(!set.overlaps_span(&PositionSpan::range(5, 10)));
341        assert!(!set.overlaps_span(&PositionSpan::range(10, 20)));
342    }
343
344    #[test]
345    fn test_overlaps_span_discrete_yes() {
346        let set = PositionSet::from_usize_iter(vec![1, 2, 3, 10, 11]);
347        assert!(set.overlaps_span(&PositionSpan::from_positions(vec![3, 4, 5])));
348        assert!(set.overlaps_span(&PositionSpan::from_positions(vec![0, 1])));
349    }
350
351    #[test]
352    fn test_overlaps_span_discrete_no() {
353        let set = PositionSet::from_usize_iter(vec![1, 2, 3]);
354        assert!(!set.overlaps_span(&PositionSpan::from_positions(vec![5, 6, 7])));
355    }
356
357    #[test]
358    fn test_overlaps_span_empty() {
359        let set = PositionSet::from_usize_iter(vec![1, 2, 3]);
360        assert!(!set.overlaps_span(&PositionSpan::empty()));
361    }
362
363    #[test]
364    fn test_contains_range_yes() {
365        let set = PositionSet::from_usize_iter(vec![1, 2, 3, 4, 5]);
366        assert!(set.contains_range(1..6));
367        assert!(set.contains_range(2..4));
368        assert!(set.contains_range(1..6));
369    }
370
371    #[test]
372    fn test_contains_range_no() {
373        let set = PositionSet::from_usize_iter(vec![1, 2, 3]);
374        assert!(!set.contains_range(0..4));
375        assert!(!set.contains_range(3..5));
376        assert!(!set.contains_range(5..10));
377    }
378
379    #[test]
380    fn test_contains_range_empty() {
381        let set = PositionSet::from_usize_iter(vec![1, 2, 3]);
382        assert!(set.contains_range(5..5));
383        assert!(set.contains_range(0..0));
384    }
385
386    #[test]
387    fn test_contains_range_disjoint() {
388        let set = PositionSet::from_usize_iter(vec![10, 11, 12]);
389        assert!(!set.contains_range(0..5));
390        assert!(!set.contains_range(15..20));
391    }
392
393    #[test]
394    fn test_to_position_span_empty() {
395        let set = PositionSet::new();
396        let span = set.to_position_span();
397        assert!(span.is_empty());
398    }
399
400    #[test]
401    fn test_to_position_span_contiguous() {
402        let set = PositionSet::from_usize_iter(vec![5, 6, 7, 8]);
403        let span = set.to_position_span();
404        assert_eq!(span, PositionSpan::range(5, 9));
405    }
406
407    #[test]
408    fn test_to_position_span_single() {
409        let set = PositionSet::from_usize_iter(vec![10]);
410        let span = set.to_position_span();
411        assert_eq!(span, PositionSpan::range(10, 11));
412    }
413
414    #[test]
415    fn test_to_position_span_discrete() {
416        let set = PositionSet::from_usize_iter(vec![1, 3, 5, 7]);
417        let span = set.to_position_span();
418        assert_eq!(span, PositionSpan::from_positions(vec![1, 3, 5, 7]));
419    }
420
421    #[test]
422    fn test_to_position_span_two_with_gap() {
423        let set = PositionSet::from_usize_iter(vec![1, 3]);
424        let span = set.to_position_span();
425        assert_eq!(span, PositionSpan::from_positions(vec![1, 3]));
426    }
427
428    #[test]
429    fn test_min_max_pos() {
430        let set = PositionSet::from_usize_iter(vec![5, 10, 15]);
431        assert_eq!(set.min_pos(), 5);
432        assert_eq!(set.max_pos(), 15);
433    }
434
435    #[test]
436    fn test_min_max_pos_empty() {
437        let set = PositionSet::new();
438        assert_eq!(set.min_pos(), usize::MAX);
439        assert_eq!(set.max_pos(), 0);
440    }
441}