compacts 0.9.0

compact data structures
Documentation
#![cfg_attr(feature = "cargo-clippy", allow(needless_pass_by_value))]
#![cfg_attr(feature = "cargo-clippy", allow(range_minus_one))]
#![cfg_attr(feature = "cargo-clippy", allow(redundant_field_names))]

use bits::repr::{Arr, Run};
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use std::iter::FromIterator;
use std::{cmp, io, ops, u16};

impl Run {
    pub fn new() -> Self {
        Run::default()
    }

    pub fn search(&self, x: &u16) -> Result<usize, usize> {
        let n = *x;
        self.ranges.binary_search_by(|range| {
            if range.start <= n && n <= range.end {
                cmp::Ordering::Equal
            } else if n < range.start {
                cmp::Ordering::Greater
            } else if range.end < n {
                cmp::Ordering::Less
            } else {
                unreachable!()
            }
        })
    }

    #[inline]
    fn index_to_insert(&self, x: &u16) -> Option<usize> {
        self.search(x).err()
    }

    #[inline]
    fn index_to_remove(&self, x: &u16) -> Option<usize> {
        self.search(x).ok()
    }

    #[inline]
    pub fn contains(&self, x: u16) -> bool {
        self.search(&x).is_ok()
    }

    pub fn insert(&mut self, x: u16) -> bool {
        let mut inserted = false;
        if let Some(pos) = self.index_to_insert(&x) {
            self.weight += 1;
            inserted = true;

            let lhs_bound = if pos != 0 {
                Some(self.ranges[pos - 1].end)
            } else {
                None
            };
            let rhs_bound = if pos < self.ranges.len() {
                Some(self.ranges[pos].start)
            } else {
                None
            };

            match (lhs_bound, rhs_bound) {
                // connect lhs and rhs
                (Some(lhs), Some(rhs)) if lhs + 1 == x && x == rhs - 1 => {
                    let start = self.ranges[pos - 1].start;
                    let end = self.ranges[pos].end;
                    self.ranges[pos - 1] = start..=end;
                    self.ranges.remove(pos);
                }
                // extend lhs
                (Some(lhs), None) if lhs + 1 == x => {
                    let start = self.ranges[pos - 1].start;
                    let end = self.ranges[pos - 1].end + 1;
                    self.ranges[pos - 1] = start..=end;
                }
                // extend rhs
                (None, Some(rhs)) if x == rhs - 1 => {
                    let start = self.ranges[pos].start - 1;
                    let end = self.ranges[pos].end;
                    self.ranges[pos] = start..=end;
                }
                _ => {
                    self.ranges.insert(pos, x..=x);
                }
            }
        }
        !inserted
    }

    pub fn remove(&mut self, x: u16) -> bool {
        let mut removed = false;
        if let Some(pos) = self.index_to_remove(&x) {
            self.weight -= 1;
            removed = true;

            match (self.ranges[pos].start, self.ranges[pos].end) {
                (i, j) if i == j => {
                    self.ranges.remove(pos);
                }
                (i, j) if i < x && x < j => {
                    self.ranges[pos] = i..=(x - 1);
                    self.ranges.insert(pos + 1, (x + 1)..=j);
                }
                (i, j) if i == x => {
                    assert!(i + 1 <= j);
                    self.ranges[pos] = (i + 1)..=j;
                }
                (i, j) if j == x => {
                    assert!(i <= j - 1);
                    self.ranges[pos] = i..=(j - 1);
                }
                _ => unreachable!(),
            };
        }
        removed
    }
}

// `Run` is serialized as a 16-bit integer indicating the number of runs,
// followed by a pair of 16-bit values for each run.
// Runs are non-overlapping and sorted.
// For example, the values `[11,12,13,14,15]` will be encoded to `11,4`
// where 4 means that beyond 11 itself.
//
// Example:
//    `[(1,3),(20,0),(31,2)]` => `[1, 2, 3, 4, 20, 31, 32, 33]`

impl Run {
    pub fn write_to<W>(&self, w: &mut W) -> io::Result<()>
    where
        W: io::Write,
    {
        w.write_u16::<LittleEndian>(self.ranges.len() as u16)?;
        for rg in &self.ranges {
            w.write_u16::<LittleEndian>(rg.start)?;
            w.write_u16::<LittleEndian>(rg.end - rg.start)?;
        }
        Ok(())
    }

    // Resize automatically.
    pub fn read_from<R>(r: &mut R) -> io::Result<Self>
    where
        R: io::Read,
    {
        let runs = r.read_u16::<LittleEndian>()?;

        let mut weight = 0;
        let mut ranges = vec![0..=0; runs as usize];

        for rg in &mut ranges {
            let s = r.read_u16::<LittleEndian>()?;
            let o = r.read_u16::<LittleEndian>()?;
            *rg = s..=(s + o);
            weight += u32::from(o) + 1;
        }

        Ok(Run { weight, ranges })
    }
}

impl<'a> From<&'a Arr> for Run {
    fn from(arr: &'a Arr) -> Self {
        let mut run = Run::new();
        for (i, &bit) in arr.bitset.iter().enumerate().filter(|&(_, &v)| v != 0) {
            for p in 0..64 {
                if bit & (1 << p) != 0 {
                    let x = (i as u16 * 64) + p;
                    run.insert(x);
                }
            }
        }
        run
    }
}

impl<'a> From<&'a [::std::ops::RangeInclusive<u16>]> for Run {
    fn from(slice: &'a [::std::ops::RangeInclusive<u16>]) -> Self {
        let mut rle16 = Run {
            weight: 0,
            ranges: Vec::with_capacity(slice.len()),
        };
        for r in slice {
            let w = u32::from(r.end - r.start) + 1;
            rle16.weight += w;
            rle16.ranges.push(r.start..=r.end);
        }
        rle16
    }
}

impl<'a> FromIterator<&'a u16> for Run {
    fn from_iter<I>(iterable: I) -> Self
    where
        I: IntoIterator<Item = &'a u16>,
    {
        let mut run = Run::new();
        for bit in iterable {
            run.insert(*bit);
        }
        run
    }
}

impl FromIterator<ops::Range<u32>> for Run {
    fn from_iter<I>(iterable: I) -> Self
    where
        I: IntoIterator<Item = ops::Range<u32>>,
    {
        let mut weight = 0;
        let mut ranges = Vec::new();
        for curr in iterable {
            assert!(curr.start < curr.end);
            weight += curr.end - curr.start;

            let start = curr.start as u16;
            let end = (curr.end - 1) as u16;

            if ranges.is_empty() {
                ranges.push(start..=end);
                continue;
            }

            let i = ranges.len() - 1;
            assert!(ranges[i].end <= start); // no overlap

            if start == (ranges[i].end + 1) {
                // merge into a previous range
                ranges[i] = ranges[i].start..=end;
            } else {
                ranges.push(start..=end);
            }
        }
        Self { weight, ranges }
    }
}