use std::ops::Range;
use bitvec::order::Lsb0;
use simple_sds_sbwt::bit_vector::*;
use simple_sds_sbwt::raw_vector::*;
use simple_sds_sbwt::ops::*;
use simple_sds_sbwt::serialize::*;
use crate::util::bitvec_to_simple_sds_raw_bitvec;
#[allow(clippy::len_without_is_empty)]
pub trait SubsetSeq{
fn new(subset_seq: Vec<Vec<u8>>, sigma: usize) -> Self;
fn new_from_bit_vectors(vecs: Vec<bitvec::vec::BitVec::<u64, Lsb0>>) -> Self;
fn len(&self) -> usize;
fn build_rank(&mut self);
fn build_select(&mut self);
fn has_select_support(&self) -> bool;
fn has_rank_support(&self) -> bool;
fn rank(&self, c: u8, i: usize) -> usize;
fn select(&self, c: u8, i: usize) -> Option<usize>;
fn append_set_to_buf(&self, i: usize, buf: &mut Vec<u8>);
fn access(&self, i: usize) -> Vec<u8>{
let mut v = Vec::new();
self.append_set_to_buf(i, &mut v);
v
}
fn subset_size(&self, i: usize) -> usize;
fn set_contains(&self, set_idx: usize, character: u8) -> bool;
fn next_set_with_char(&self, set_idx: usize, c: u8) -> Option<usize>;
fn serialize<W: std::io::Write>(&self, out: &mut W) -> std::io::Result<usize>;
fn load<R: std::io::Read>(input: &mut R) -> std::io::Result<Self> where Self: Sized;
fn call_on_char_occurrences<F: FnMut(usize)>(&self, range: Range<usize>, c: u8, callback: F);
fn get_C_array(&self) -> Vec<usize> {
let sigma = 4; let n = self.len();
let mut C: Vec<usize> = vec![0; sigma];
for i in 0..n {
for c in 0..(sigma as u8) {
if self.set_contains(i, c) {
for d in (c + 1)..(sigma as u8) {
C[d as usize] += 1;
}
}
}
}
#[allow(clippy::needless_range_loop)] for c in 0..sigma {
C[c] += 1;
}
C
}
fn push_labels_forward(&self, labels: &[u8], sbwt_input_range: std::ops::Range<usize>, output_ranges: Vec<&mut[u8]>);
}
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct SubsetMatrix{
rows: Vec<BitVector>
}
impl SubsetSeq for SubsetMatrix{
fn serialize<W: std::io::Write>(&self, out: &mut W) -> std::io::Result<usize>{
let mut n_written = 0_usize;
let n_rows = self.rows.len() as u64;
out.write_all(&n_rows.to_le_bytes())?;
for row in self.rows.iter(){
row.serialize(out)?;
n_written += row.size_in_bytes();
}
Ok(n_written)
}
fn load<R: std::io::Read>(input: &mut R) -> std::io::Result<Self>{
let n_rows = u64::load(input)? as usize;
let mut rows = Vec::<BitVector>::new();
for _ in 0..n_rows{
rows.push(BitVector::load(input)?);
}
Ok(Self{rows})
}
fn new_from_bit_vectors(rows: Vec<bitvec::vec::BitVec<u64, Lsb0>>) -> Self{
Self{rows: rows.into_iter().map(|row|
simple_sds_sbwt::bit_vector::BitVector::from(
bitvec_to_simple_sds_raw_bitvec(row))
).collect()
}
}
fn new(subset_seq: Vec<Vec<u8>>, sigma: usize) -> Self{
let n = subset_seq.len();
let mut rawrows = Vec::<RawVector>::new();
for _ in 0..sigma{
rawrows.push(RawVector::with_len(n, false));
}
for (i, set) in subset_seq.iter().enumerate(){
for c in set.iter(){
rawrows[*c as usize].set_bit(i, true)
}
}
let rows: Vec<BitVector> = rawrows.into_iter().map(BitVector::from).collect();
Self{rows}
}
fn build_rank(&mut self) {
for row in self.rows.iter_mut(){
row.enable_rank();
}
}
fn build_select(&mut self) {
for row in self.rows.iter_mut(){
row.enable_select();
}
}
fn has_select_support(&self) -> bool {
self.rows[0].supports_select()
}
fn has_rank_support(&self) -> bool {
self.rows[0].supports_rank()
}
fn rank(&self, c: u8, i: usize) -> usize{
self.rows[c as usize].rank(i)
}
fn select(&self, c: u8, i: usize) -> Option<usize>{
assert!(self.rows[c as usize].supports_select());
self.rows[c as usize].select(i)
}
fn len(&self) -> usize{
if self.rows.is_empty() { 0 }
else { self.rows[0].len() }
}
fn subset_size(&self, i: usize) -> usize {
self.rows.iter().filter(|&row| row.get(i)).count()
}
fn set_contains(&self, set_idx: usize, character: u8) -> bool {
self.rows[character as usize].get(set_idx)
}
fn append_set_to_buf(&self, i: usize, buf: &mut Vec<u8>){
for c in 0..self.rows.len(){
if self.rows[c].get(i){
buf.push(c as u8);
}
}
}
fn next_set_with_char(&self, set_idx: usize, c: u8) -> Option<usize> {
let bv = bitvec::slice::BitSlice::<u64, Lsb0>::from_slice(self.rows[c as usize].as_ref().as_ref());
let off = bv[set_idx..].first_one()?;
Some(set_idx + off)
}
fn call_on_char_occurrences<F: FnMut(usize)>(&self, range: Range<usize>, c: u8, mut callback: F) {
let bv = bitvec::slice::BitSlice::<u64, Lsb0>::from_slice(self.rows[c as usize].as_ref().as_ref());
let slice = &bv[range.clone()];
for i in slice.iter_ones() {
callback(range.start + i);
}
}
fn push_labels_forward(&self, labels: &[u8], sbwt_input_range: std::ops::Range<usize>, output_ranges: Vec<&mut[u8]>) {
assert_eq!(labels.len(), sbwt_input_range.len());
assert_eq!(output_ranges.len(), self.rows.len());
if sbwt_input_range.is_empty() { return }
#[allow(clippy::needless_range_loop)]
for (char_idx, output_slice) in output_ranges.into_iter().enumerate() {
let mut output_offset = 0_usize;
let words = self.rows[char_idx].as_ref().get_words();
crate::util::for_each_one_bit(words, sbwt_input_range.clone(), |i| {
output_slice[output_offset] = labels[i - sbwt_input_range.start];
output_offset += 1;
});
}
}
}
impl std::fmt::Display for SubsetMatrix{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for row in self.rows.iter(){
for i in 0..row.len(){
if row.get(i){
write!(f, "1")?;
} else{
write!(f, "0")?;
}
}
writeln!(f)?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn serialize_and_load(){
let sets: Vec<Vec<u8>> = vec![vec![1,2,3], vec![0,2], vec![0,1,3,4], vec![], vec![0,1,2]];
let mut sm = SubsetMatrix::new(sets, 5);
sm.build_rank();
let mut buf = Vec::<u8>::new();
sm.serialize(&mut buf).unwrap();
let sm2 = SubsetMatrix::load(&mut buf.as_slice()).unwrap();
assert_eq!(sm, sm2);
}
#[test]
fn next_set_with_char(){
let sets: Vec<Vec<u8>> = vec![vec![1,2,3], vec![0,2], vec![0,1,3,4], vec![], vec![0,1,2]];
let sm = SubsetMatrix::new(sets, 5);
assert_eq!(sm.next_set_with_char(0,0), Some(1));
assert_eq!(sm.next_set_with_char(0,1), Some(0));
assert_eq!(sm.next_set_with_char(0,2), Some(0));
assert_eq!(sm.next_set_with_char(0,3), Some(0));
assert_eq!(sm.next_set_with_char(0,4), Some(2));
assert_eq!(sm.next_set_with_char(3,0), Some(4));
assert_eq!(sm.next_set_with_char(3,1), Some(4));
assert_eq!(sm.next_set_with_char(3,2), Some(4));
assert_eq!(sm.next_set_with_char(3,3), None);
assert_eq!(sm.next_set_with_char(3,4), None);
assert_eq!(sm.next_set_with_char(5,0), None); }
}