Skip to main content

provenant/license_detection/
position_set.rs

1use bit_set::BitSet;
2
3use crate::license_detection::models::position_span::PositionSpan;
4
5/// A set of usize positions stored as a BitSet.
6/// Provides O(1) membership testing and efficient set operations.
7/// Caches bounds for cheap overlap pre-checks.
8#[derive(Clone, Debug, PartialEq, Eq)]
9pub struct PositionSet {
10    bitset: BitSet,
11    min_pos: usize,
12    max_pos: usize,
13}
14
15impl PositionSet {
16    /// Create a PositionSet from an iterator of usize positions.
17    pub fn from_usize_iter<I: IntoIterator<Item = usize>>(iter: I) -> Self {
18        let mut bitset = BitSet::new();
19        let mut min_pos = usize::MAX;
20        let mut max_pos = 0;
21
22        for pos in iter {
23            bitset.insert(pos);
24            min_pos = min_pos.min(pos);
25            max_pos = max_pos.max(pos);
26        }
27
28        Self {
29            bitset,
30            min_pos,
31            max_pos,
32        }
33    }
34
35    /// Create an empty PositionSet.
36    pub fn new() -> Self {
37        Self {
38            bitset: BitSet::new(),
39            min_pos: usize::MAX,
40            max_pos: 0,
41        }
42    }
43
44    /// Number of positions in the set.
45    pub fn len(&self) -> usize {
46        self.bitset.count()
47    }
48
49    /// Is the set empty?
50    pub fn is_empty(&self) -> bool {
51        self.bitset.is_empty()
52    }
53
54    /// Returns the minimum position in the set.
55    ///
56    /// Returns `usize::MAX` for an empty set.
57    pub fn min_pos(&self) -> usize {
58        self.min_pos
59    }
60
61    /// Returns the maximum position in the set.
62    ///
63    /// Returns `0` for an empty set.
64    pub fn max_pos(&self) -> usize {
65        self.max_pos
66    }
67
68    /// Insert a position.
69    pub fn insert(&mut self, pos: usize) -> bool {
70        let inserted = self.bitset.insert(pos);
71        if inserted {
72            self.min_pos = self.min_pos.min(pos);
73            self.max_pos = self.max_pos.max(pos);
74        }
75        inserted
76    }
77
78    /// Extend this set from a PositionSpan without allocating an intermediate set.
79    pub fn extend_from_span(&mut self, span: &PositionSpan) {
80        match span {
81            PositionSpan::Range { start, end } => {
82                for pos in *start..*end {
83                    self.insert(pos);
84                }
85            }
86            PositionSpan::Discrete(positions) => {
87                for &pos in positions {
88                    self.insert(pos);
89                }
90            }
91        }
92    }
93
94    /// Check if position is in the set.
95    pub fn contains(&self, pos: usize) -> bool {
96        self.bitset.contains(pos)
97    }
98
99    /// Remove a position from the set.
100    pub fn remove(&mut self, pos: usize) -> bool {
101        self.bitset.remove(pos)
102    }
103
104    /// Remove all positions in a span from the set.
105    pub fn remove_span(&mut self, span: &PositionSpan) {
106        for pos in span.iter() {
107            self.remove(pos);
108        }
109    }
110
111    /// Quick check if a range [range_start, range_end) might overlap with this set.
112    /// Returns true if the bounding boxes overlap, false if they definitely don't.
113    /// This is O(1) and used as a pre-filter before the expensive BitSet check.
114    #[inline]
115    pub fn may_overlap_range(&self, range_start: usize, range_end: usize) -> bool {
116        // min_pos == usize::MAX means empty set (see new())
117        if self.min_pos == usize::MAX {
118            return false;
119        }
120        range_end > self.min_pos && range_start <= self.max_pos
121    }
122
123    /// Compute the union of this set with another PositionSet.
124    ///
125    /// Returns a new PositionSet containing all positions from both sets.
126    pub fn union(&self, other: &PositionSet) -> PositionSet {
127        let mut result = self.clone();
128        for pos in other.iter() {
129            result.insert(pos);
130        }
131        result
132    }
133
134    /// Return the difference (elements in self but not in other).
135    pub fn difference(&self, other: &PositionSet) -> PositionSet {
136        let mut result = PositionSet::new();
137        for pos in self.bitset.iter() {
138            if !other.bitset.contains(pos) {
139                result.insert(pos);
140            }
141        }
142        result
143    }
144
145    /// Count elements in the intersection of self and other.
146    pub fn intersection_len(&self, other: &PositionSet) -> usize {
147        self.bitset
148            .iter()
149            .filter(|&p| other.bitset.contains(p))
150            .count()
151    }
152
153    /// Check if this set overlaps with a PositionSpan.
154    /// Uses O(1) bounds check before the O(n) element-wise check.
155    pub fn overlaps_span(&self, span: &PositionSpan) -> bool {
156        let (span_min, span_max) = span.bounds();
157        if span.is_empty() {
158            return false;
159        }
160        if !self.may_overlap_range(span_min, span_max) {
161            return false;
162        }
163        span.iter().any(|p| self.contains(p))
164    }
165
166    /// Check if this set contains all positions in a range.
167    /// Returns true for empty ranges.
168    pub fn contains_range(&self, range: std::ops::Range<usize>) -> bool {
169        if range.is_empty() {
170            return true;
171        }
172        let (start, end) = (range.start, range.end);
173        if !self.may_overlap_range(start, end) {
174            return false;
175        }
176        (start..end).all(|pos| self.contains(pos))
177    }
178
179    /// Iterate over positions.
180    pub fn iter(&self) -> impl Iterator<Item = usize> + '_ {
181        self.bitset.iter()
182    }
183
184    /// Convert this PositionSet to a PositionSpan.
185    ///
186    /// If positions are contiguous, returns a Range; otherwise returns Discrete.
187    pub fn to_position_span(&self) -> PositionSpan {
188        if self.is_empty() {
189            return PositionSpan::empty();
190        }
191
192        let positions: Vec<usize> = self.iter().collect();
193        let is_contiguous = positions.windows(2).all(|w| w[1] == w[0] + 1);
194
195        if is_contiguous {
196            PositionSpan::range(self.min_pos, self.max_pos + 1)
197        } else {
198            PositionSpan::from_positions(positions)
199        }
200    }
201}
202
203impl Default for PositionSet {
204    fn default() -> Self {
205        Self::new()
206    }
207}
208
209impl std::iter::FromIterator<usize> for PositionSet {
210    fn from_iter<T: IntoIterator<Item = usize>>(iter: T) -> Self {
211        Self::from_usize_iter(iter)
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218
219    #[test]
220    fn test_new_empty() {
221        let set = PositionSet::new();
222        assert!(set.is_empty());
223        assert_eq!(set.len(), 0);
224    }
225
226    #[test]
227    fn test_from_usize_iter_sorted() {
228        let set = PositionSet::from_usize_iter(vec![1, 2, 3]);
229        assert_eq!(set.len(), 3);
230        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
231    }
232
233    #[test]
234    fn test_from_usize_iter_unsorted() {
235        let set = PositionSet::from_usize_iter(vec![3, 1, 2]);
236        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
237    }
238
239    #[test]
240    fn test_from_usize_iter_dedup() {
241        let set = PositionSet::from_usize_iter(vec![1, 2, 2, 3, 3, 3]);
242        assert_eq!(set.len(), 3);
243        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
244    }
245
246    #[test]
247    fn test_insert() {
248        let mut set = PositionSet::new();
249        assert!(set.insert(2));
250        assert!(set.insert(1));
251        assert!(set.insert(3));
252        assert!(!set.insert(2)); // Already present
253        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
254    }
255
256    #[test]
257    fn test_difference() {
258        let a = PositionSet::from_usize_iter(vec![1, 2, 3, 4]);
259        let b = PositionSet::from_usize_iter(vec![2, 4, 6]);
260        let diff = a.difference(&b);
261        assert_eq!(diff.iter().collect::<Vec<_>>(), vec![1, 3]);
262    }
263
264    #[test]
265    fn test_difference_empty() {
266        let a = PositionSet::from_usize_iter(vec![1, 2, 3]);
267        let b = PositionSet::new();
268        let diff = a.difference(&b);
269        assert_eq!(diff.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
270    }
271
272    #[test]
273    fn test_difference_all_overlap() {
274        let a = PositionSet::from_usize_iter(vec![1, 2, 3]);
275        let b = PositionSet::from_usize_iter(vec![1, 2, 3]);
276        let diff = a.difference(&b);
277        assert!(diff.is_empty());
278    }
279
280    #[test]
281    fn test_contains() {
282        let set = PositionSet::from_usize_iter(vec![1, 3, 5]);
283        assert!(set.contains(1));
284        assert!(set.contains(3));
285        assert!(set.contains(5));
286        assert!(!set.contains(0));
287        assert!(!set.contains(2));
288        assert!(!set.contains(4));
289    }
290
291    #[test]
292    fn test_collect() {
293        let set: PositionSet = vec![3, 1, 2].into_iter().collect();
294        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
295    }
296
297    #[test]
298    fn test_extend_from_span_range() {
299        let mut set = PositionSet::new();
300        set.extend_from_span(&PositionSpan::range(5, 10));
301        assert_eq!(set.len(), 5);
302        assert!(set.contains(5));
303        assert!(set.contains(9));
304        assert!(!set.contains(4));
305        assert!(!set.contains(10));
306    }
307
308    #[test]
309    fn test_extend_from_span_discrete() {
310        let mut set = PositionSet::new();
311        set.extend_from_span(&PositionSpan::from_positions(vec![1, 3, 5]));
312        assert_eq!(set.len(), 3);
313        assert!(set.contains(1));
314        assert!(set.contains(3));
315        assert!(set.contains(5));
316        assert!(!set.contains(2));
317    }
318
319    #[test]
320    fn test_extend_from_span_merge() {
321        let mut set = PositionSet::from_usize_iter(vec![1, 2, 3]);
322        set.extend_from_span(&PositionSpan::range(2, 6));
323        assert_eq!(set.len(), 5);
324        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3, 4, 5]);
325    }
326
327    #[test]
328    fn test_overlaps_span_range_yes() {
329        let set = PositionSet::from_usize_iter(vec![5, 6, 7]);
330        assert!(set.overlaps_span(&PositionSpan::range(6, 10)));
331        assert!(set.overlaps_span(&PositionSpan::range(0, 6)));
332    }
333
334    #[test]
335    fn test_overlaps_span_range_no() {
336        let set = PositionSet::from_usize_iter(vec![1, 2, 3]);
337        assert!(!set.overlaps_span(&PositionSpan::range(5, 10)));
338        assert!(!set.overlaps_span(&PositionSpan::range(10, 20)));
339    }
340
341    #[test]
342    fn test_overlaps_span_discrete_yes() {
343        let set = PositionSet::from_usize_iter(vec![1, 2, 3, 10, 11]);
344        assert!(set.overlaps_span(&PositionSpan::from_positions(vec![3, 4, 5])));
345        assert!(set.overlaps_span(&PositionSpan::from_positions(vec![0, 1])));
346    }
347
348    #[test]
349    fn test_overlaps_span_discrete_no() {
350        let set = PositionSet::from_usize_iter(vec![1, 2, 3]);
351        assert!(!set.overlaps_span(&PositionSpan::from_positions(vec![5, 6, 7])));
352    }
353
354    #[test]
355    fn test_overlaps_span_empty() {
356        let set = PositionSet::from_usize_iter(vec![1, 2, 3]);
357        assert!(!set.overlaps_span(&PositionSpan::empty()));
358    }
359
360    #[test]
361    fn test_contains_range_yes() {
362        let set = PositionSet::from_usize_iter(vec![1, 2, 3, 4, 5]);
363        assert!(set.contains_range(1..6));
364        assert!(set.contains_range(2..4));
365        assert!(set.contains_range(1..6));
366    }
367
368    #[test]
369    fn test_contains_range_no() {
370        let set = PositionSet::from_usize_iter(vec![1, 2, 3]);
371        assert!(!set.contains_range(0..4));
372        assert!(!set.contains_range(3..5));
373        assert!(!set.contains_range(5..10));
374    }
375
376    #[test]
377    fn test_contains_range_empty() {
378        let set = PositionSet::from_usize_iter(vec![1, 2, 3]);
379        assert!(set.contains_range(5..5));
380        assert!(set.contains_range(0..0));
381    }
382
383    #[test]
384    fn test_contains_range_disjoint() {
385        let set = PositionSet::from_usize_iter(vec![10, 11, 12]);
386        assert!(!set.contains_range(0..5));
387        assert!(!set.contains_range(15..20));
388    }
389
390    #[test]
391    fn test_to_position_span_empty() {
392        let set = PositionSet::new();
393        let span = set.to_position_span();
394        assert!(span.is_empty());
395    }
396
397    #[test]
398    fn test_to_position_span_contiguous() {
399        let set = PositionSet::from_usize_iter(vec![5, 6, 7, 8]);
400        let span = set.to_position_span();
401        assert_eq!(span, PositionSpan::range(5, 9));
402    }
403
404    #[test]
405    fn test_to_position_span_single() {
406        let set = PositionSet::from_usize_iter(vec![10]);
407        let span = set.to_position_span();
408        assert_eq!(span, PositionSpan::range(10, 11));
409    }
410
411    #[test]
412    fn test_to_position_span_discrete() {
413        let set = PositionSet::from_usize_iter(vec![1, 3, 5, 7]);
414        let span = set.to_position_span();
415        assert_eq!(span, PositionSpan::from_positions(vec![1, 3, 5, 7]));
416    }
417
418    #[test]
419    fn test_to_position_span_two_with_gap() {
420        let set = PositionSet::from_usize_iter(vec![1, 3]);
421        let span = set.to_position_span();
422        assert_eq!(span, PositionSpan::from_positions(vec![1, 3]));
423    }
424
425    #[test]
426    fn test_min_max_pos() {
427        let set = PositionSet::from_usize_iter(vec![5, 10, 15]);
428        assert_eq!(set.min_pos(), 5);
429        assert_eq!(set.max_pos(), 15);
430    }
431
432    #[test]
433    fn test_min_max_pos_empty() {
434        let set = PositionSet::new();
435        assert_eq!(set.min_pos(), usize::MAX);
436        assert_eq!(set.max_pos(), 0);
437    }
438}