use std::collections::VecDeque;
use std::convert::TryFrom;
use std::ops::Range;
use itertools::Itertools;
pub trait Interval {
type Coord: Ord;
fn start(&self) -> &Self::Coord;
fn end(&self) -> &Self::Coord;
}
impl<N> Interval for Range<N> where N: Ord{
type Coord = N;
#[inline(always)]
fn start(&self) -> &Self::Coord {
&self.start
}
#[inline(always)]
fn end(&self) -> &Self::Coord {
&self.end
}
}
#[derive(Debug)]
pub struct NClist<T> where T: Interval {
intervals: Vec<T>,
contained: Vec<Option<(usize, usize)>>
}
struct SlicedNClist<'a, T> where T: 'a + Interval {
intervals: &'a [T],
contained: &'a [Option<(usize, usize)>],
stop_at: &'a T::Coord
}
pub struct Overlaps<'a, T> where T: 'a + Interval {
nclist: &'a NClist<T>,
range: &'a Range<T::Coord>,
current_pos: usize,
current_end: usize,
sublists: VecDeque<(usize, usize)>,
}
pub struct OrderedOverlaps<'a, T> where T: 'a + Interval {
nclist: &'a NClist<T>,
range: &'a Range<T::Coord>,
current: SlicedNClist<'a, T>,
queue: Vec<SlicedNClist<'a, T>>
}
impl<T> NClist<T> where T: Interval {
fn new() -> NClist<T> {
NClist { intervals: Vec::new(), contained: vec![Some((0,0))] }
}
pub fn from_vec(mut v: Vec<T>) -> Result<NClist<T>, &'static str> {
if v.iter().any(|e| e.end() <= e.start()) {
return Err("Cannot use intervals with zero or negative width");
}
v.sort_by(|a, b| a.start().cmp(b.start())
.then(a.end().cmp(b.end()).reverse()));
let mut list = NClist::new();
let mut sublists = VecDeque::from(vec![NClistBuilder { intervals: v, contained_pos: 0}]);
while !sublists.is_empty() {
build_nclist(&mut sublists, &mut list);
}
Ok(list)
}
pub fn count_overlaps(&self, r: &Range<T::Coord>) -> usize {
if r.end <= r.start {
return 0;
}
let mut count = 0;
let mut queue = VecDeque::new();
queue.push_back(self.contained[0].unwrap());
while let Some((start, end)) = queue.pop_front() {
self.slice(start, end, &r.start, &r.end)
.for_each(|(_, contained)| {
count += 1;
if let Some(subrange) = *contained {
queue.push_back(subrange);
}
});
}
count
}
pub fn overlaps<'a>(&'a self, r: &'a Range<T::Coord>) -> Overlaps<'a , T> {
let current_slice = self.contained[0].as_ref().unwrap();
let start = if r.end > r.start {
self.bin_search_end(current_slice.0, current_slice.1, &r.start)
} else {
current_slice.1
};
Overlaps { nclist: self, range: r, current_pos: start, current_end: current_slice.1, sublists: VecDeque::new() }
}
pub fn overlaps_ordered<'a>(&'a self, r: &'a Range<T::Coord>) -> OrderedOverlaps<'a , T> {
let &(mut start, end) = self.contained[0].as_ref().unwrap();
if r.end <= r.start {
start = end;
}
OrderedOverlaps { nclist: self, range: r, current: self.slice(start, end, &r.start, &r.end), queue: Vec::new() }
}
pub fn into_vec(self) -> Vec<T> {
self.into()
}
#[inline]
fn slice<'a>(&'a self, mut start: usize, end: usize, q: &T::Coord, q_end: &'a T::Coord) -> SlicedNClist<'a, T> {
start += match self.intervals[start..end].binary_search_by(|e| e.end().cmp(q))
{
Ok(n) => n + 1,
Err(n) => n
};
SlicedNClist { intervals: &self.intervals[start..end], contained: &self.contained[start+1..end+1], stop_at: q_end }
}
#[inline]
fn bin_search_end(&self, start: usize, end: usize, q: &T::Coord) -> usize {
match self.intervals[start..end].binary_search_by(|e| e.end().cmp(q)) {
Ok(n) => n + 1,
Err(n) => n
}
}
}
impl<'a, T> Iterator for SlicedNClist<'a, T> where T: Interval {
type Item = (&'a T, &'a Option<(usize, usize)>);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if let Some((i, ref mut intervals)) = self.intervals.split_first() {
if i.start() >= self.stop_at {
None
} else {
let (c, ref mut contained) = self.contained.split_first().unwrap();
std::mem::replace(&mut self.intervals, intervals);
std::mem::replace(&mut self.contained, contained);
Some((i, c))
}
} else {
None
}
}
}
impl<'a, T> Iterator for Overlaps<'a, T> where T: Interval {
type Item = &'a T;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
let remaining = self.current_end - self.current_pos;
if remaining == 0 || *self.nclist.intervals[self.current_pos].start() >= self.range.end {
if let Some((mut new_start, new_end)) = self.sublists.pop_front() {
new_start += self.nclist.bin_search_end(new_start, new_end, &self.range.start);
self.current_pos = new_start;
self.current_end = new_end;
self.next()
} else {
None
}
} else {
let pos = self.current_pos;
self.current_pos += 1;
if let Some(next_sublist) = self.nclist.contained[self.current_pos] {
self.sublists.push_back(next_sublist);
}
Some(&self.nclist.intervals[pos])
}
}
}
impl<'a, T> Iterator for OrderedOverlaps<'a, T> where T: Interval {
type Item = &'a T;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if let Some((interval, contained)) = self.current.next() {
if let Some((start, end)) = *contained {
let mut ns = self.nclist.slice(start, end, &self.range.start, &self.range.end);
std::mem::swap(&mut self.current, &mut ns);
self.queue.push(ns);
}
Some(interval)
} else if let Some(sublist) = self.queue.pop() {
self.current = sublist;
self.next()
} else {
None
}
}
}
struct NClistBuilder<T> {
intervals: Vec<T>,
contained_pos: usize,
}
fn build_nclist<T: Interval>(sublists: &mut VecDeque<NClistBuilder<T>>, result: &mut NClist<T>) {
if let Some(sublist) = sublists.pop_front() {
let mut it = sublist.intervals.into_iter().peekable();
let sublist_start = result.intervals.len();
while let Some(e) = it.next() {
let interval_pos = result.intervals.len();
let contained: Vec<_> = it.peeking_take_while(|n| n.end() < e.end()).collect();
if !contained.is_empty() {
sublists.push_back(NClistBuilder {intervals: contained, contained_pos: interval_pos + 1});
}
result.intervals.push(e);
result.contained.push(None);
}
result.contained[sublist.contained_pos] = Some((sublist_start, result.intervals.len()));
}
}
impl<T> TryFrom<Vec<T>> for NClist<T> where T: Interval {
type Error = &'static str;
fn try_from(v: Vec<T>) -> Result<Self, Self::Error> {
NClist::from_vec(v)
}
}
impl<T> Into<Vec<T>> for NClist<T> where T: Interval {
fn into(self) -> Vec<T> {
self.intervals
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_binary_search_implementation_details() {
let b = [1, 1, 2, 2, 3, 3, 3];
assert_eq!(b.binary_search(&1), Ok(1));
assert_eq!(b.binary_search(&2), Ok(3));
assert_eq!(b.binary_search(&3), Ok(6));
let b = [1, 1, 1, 1, 1, 3, 3, 3, 3];
assert_eq!(b.binary_search(&1), Ok(4));
assert_eq!(b.binary_search(&3), Ok(8));
let b = [1, 1, 1, 1, 3, 3, 3, 3, 3];
assert_eq!(b.binary_search(&1), Ok(3));
assert_eq!(b.binary_search(&3), Ok(8));
}
#[test]
fn from_vec() {
let list: Vec<Range<u64>> = vec![(10..15), (10..20), (1..8)].into_iter().collect();
let nclist = NClist::from_vec(list).unwrap();
assert_eq!(nclist.intervals.len(), 3);
assert!(nclist.contained[0].is_some());
assert!(nclist.contained[1].is_none());
assert!(nclist.contained[2].is_some());
assert!(nclist.contained[3].is_none());
let list: Vec<Range<u64>> = Vec::new();
let nclist = NClist::from_vec(list).unwrap();
assert_eq!(nclist.intervals.len(), 0);
}
#[test]
fn interval_width() {
let list: Vec<Range<u64>> = vec![(5..20), (19..20), (7..7)].into_iter().collect();
assert!(NClist::from_vec(list).is_err());
let list: Vec<Range<u64>> = vec![(5..20), (20..19), (7..8)].into_iter().collect();
assert!(NClist::from_vec(list).is_err());
}
#[test]
fn illegal_width_queries() {
let list: Vec<Range<u64>> = vec![(5..20), (19..20), (7..8)].into_iter().collect();
let nclist = NClist::from_vec(list).unwrap();
assert_eq!(nclist.count_overlaps(&(7..7)), 0);
assert_eq!(nclist.count_overlaps(&(8..7)), 0);
assert_eq!(nclist.count_overlaps(&(19..19)), 0);
assert_eq!(nclist.overlaps(&(7..7)).count(), 0);
assert_eq!(nclist.overlaps(&(8..7)).count(), 0);
assert_eq!(nclist.overlaps(&(19..19)).count(), 0);
assert_eq!(nclist.overlaps_ordered(&(7..7)).count(), 0);
assert_eq!(nclist.overlaps_ordered(&(8..7)).count(), 0);
assert_eq!(nclist.overlaps_ordered(&(19..19)).count(), 0);
}
#[test]
fn count() {
let list: Vec<Range<u64>> = vec![(10..15), (10..20), (1..8)].into_iter().collect();
let nclist = NClist::from_vec(list).unwrap();
assert_eq!(nclist.intervals.len(), 3);
assert_eq!(nclist.count_overlaps(&(5..20)), 3);
assert_eq!(nclist.count_overlaps(&(14..18)), 2);
assert_eq!(nclist.count_overlaps(&(150..180)), 0);
assert_eq!(nclist.count_overlaps(&(10..10)), 0);
assert_eq!(nclist.count_overlaps(&(10..11)), 2);
assert_eq!(nclist.count_overlaps(&(9..10)), 0);
assert_eq!(nclist.count_overlaps(&(8..9)), 0);
assert_eq!(nclist.count_overlaps(&(8..10)), 0);
assert_eq!(nclist.count_overlaps(&(20..100)), 0);
let list: Vec<Range<u64>> = Vec::new();
let nclist = NClist::from_vec(list).unwrap();
assert_eq!(nclist.count_overlaps(&(100..200)), 0);
}
#[test]
fn overlaps() {
let list: Vec<Range<u64>> = vec![(10..15), (10..20), (1..8)].into_iter().collect();
let nclist = NClist::from_vec(list).unwrap();
assert_eq!(nclist.intervals.len(), 3);
assert_eq!(nclist.overlaps(&(5..20)).count(), 3);
let mut q = nclist.overlaps_ordered(&(5..20));
assert_eq!(q.next(), Some(&(1..8)));
assert_eq!(q.next(), Some(&(10..20)));
assert_eq!(q.next(), Some(&(10..15)));
assert_eq!(q.next(), None);
assert_eq!(nclist.overlaps(&(20..100)).count(), 0);
assert_eq!(nclist.overlaps(&(8..10)).count(), 0);
assert_eq!(nclist.overlaps(&(8..9)).count(), 0);
}
#[test]
fn duplicate_intervals() {
let list: Vec<Range<u64>> = vec![(10..15), (11..13), (10..20), (1..8), (11..13), (16..18)].into_iter().collect();
let nclist = NClist::from_vec(list).unwrap();
println!("{:?}", nclist);
assert_eq!(nclist.overlaps(&(5..20)).count(), 6);
assert_eq!(nclist.overlaps(&(11..13)).count(), 4);
let mut q = nclist.overlaps_ordered(&(11..17));
assert_eq!(q.next(), Some(&(10..20)));
assert_eq!(q.next(), Some(&(10..15)));
assert_eq!(q.next(), Some(&(11..13)));
assert_eq!(q.next(), Some(&(11..13)));
assert_eq!(q.next(), Some(&(16..18)));
assert_eq!(q.next(), None);
assert_eq!(nclist.overlaps(&(20..100)).count(), 0);
assert_eq!(nclist.overlaps(&(8..10)).count(), 0);
assert_eq!(nclist.overlaps(&(8..9)).count(), 0);
}
}