Skip to main content

ruranges_core/
split.rs

1use crate::{
2    ruranges_structs::{GroupType, PositionType},
3    sorts,
4};
5
6pub fn sweep_line_split<G: GroupType, T: PositionType>(
7    chrs: &[G],
8    starts: &[T],
9    ends: &[T],
10    slack: T,
11    between: bool,
12) -> (Vec<u32>, Vec<T>, Vec<T>) {
13    let events = sorts::build_sorted_events_single_collection(chrs, starts, ends, slack);
14
15    // These will hold the output arrays: each emitted subinterval’s
16    // (original_idx, start, end).
17    let mut idxs_out = Vec::new();
18    let mut starts_out = Vec::new();
19    let mut ends_out = Vec::new();
20
21    // Edge case: no intervals
22    if events.is_empty() {
23        return (idxs_out, starts_out, ends_out);
24    }
25
26    // State for the sweep line
27    let mut current_chr = events[0].chr;
28    // We initialize coverage to 0, then we will “process” each event,
29    // but we need a “last_pos” to track from where we last emitted.
30    let mut active_count: u32 = 0;
31    let mut last_pos = events[0].pos;
32    let mut last_idx = events[0].idx; // you can store whichever index you like
33
34    // Decide whether coverage is “on” at the very first position:
35    // If the first event is a start, coverage goes from 0 → 1 at that point.
36    if events[0].is_start {
37        active_count = 1;
38    }
39
40    // We iterate from the *second* event onward.
41    // At each new event, we emit from last_pos → e.pos if either coverage was > 0 or `between = true`.
42    for e_i in 1..events.len() {
43        let e = &events[e_i];
44
45        // If chromosome changes, we “jump” to a new chromosome
46        // and do *not* produce an interval bridging old->new.
47        if e.chr != current_chr {
48            // reset
49            current_chr = e.chr;
50            active_count = if e.is_start { 1 } else { 0 };
51            last_pos = e.pos;
52            last_idx = e.idx;
53            continue;
54        }
55
56        // same chromosome => we may emit from last_pos..e.pos if it's > 0 length
57        // and either coverage>0 or we want the gap (between = true).
58        if e.pos > last_pos {
59            // If we were in coverage or want gaps, emit the subinterval.
60            if active_count > 0 || between {
61                idxs_out.push(last_idx);
62                starts_out.push(last_pos);
63                ends_out.push(e.pos);
64            }
65            last_pos = e.pos;
66            last_idx = e.idx; // you might prefer to keep the same idx as “first covering interval”
67        }
68
69        // Now handle the event itself (this flips coverage up or down).
70        if e.is_start {
71            active_count += 1;
72        } else {
73            // is an end
74            if active_count > 0 {
75                active_count -= 1;
76            }
77        }
78    }
79
80    (idxs_out, starts_out, ends_out)
81}