use std::cmp;
use crate::interval::Interval;
use crate::tree::{Node, NodeInfo, TreeBuilder};
use std::fmt;
use std::slice;
#[derive(Clone, PartialEq, Eq, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
struct Segment {
len: usize,
count: usize,
}
#[derive(Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Subset {
segments: Vec<Segment>,
}
#[derive(Default)]
pub struct SubsetBuilder {
segments: Vec<Segment>,
total_len: usize,
}
impl SubsetBuilder {
pub fn new() -> SubsetBuilder {
SubsetBuilder::default()
}
pub fn pad_to_len(&mut self, total_len: usize) {
if total_len > self.total_len {
let cur_len = self.total_len;
self.push_segment(total_len - cur_len, 0);
}
}
pub fn add_range(&mut self, begin: usize, end: usize, count: usize) {
assert!(begin >= self.total_len, "ranges must be added in non-decreasing order");
if begin >= end {
return;
}
let len = end - begin;
let cur_total_len = self.total_len;
if begin > self.total_len {
self.push_segment(begin - cur_total_len, 0);
}
self.push_segment(len, count);
}
pub fn push_segment(&mut self, len: usize, count: usize) {
assert!(len > 0, "can't push empty segment");
self.total_len += len;
if let Some(last) = self.segments.last_mut() {
if last.count == count {
last.len += len;
return;
}
}
self.segments.push(Segment { len, count });
}
pub fn build(self) -> Subset {
Subset { segments: self.segments }
}
}
#[derive(Clone, Copy, Debug)]
pub enum CountMatcher {
Zero,
NonZero,
All,
}
impl CountMatcher {
fn matches(self, seg: &Segment) -> bool {
match self {
CountMatcher::Zero => (seg.count == 0),
CountMatcher::NonZero => (seg.count != 0),
CountMatcher::All => true,
}
}
}
impl Subset {
pub fn new(len: usize) -> Subset {
let mut sb = SubsetBuilder::new();
sb.pad_to_len(len);
sb.build()
}
pub fn delete_from_string(&self, s: &str) -> String {
let mut result = String::new();
for (b, e) in self.range_iter(CountMatcher::Zero) {
result.push_str(&s[b..e]);
}
result
}
pub fn delete_from<N: NodeInfo>(&self, s: &Node<N>) -> Node<N> {
let mut b = TreeBuilder::new();
for (beg, end) in self.range_iter(CountMatcher::Zero) {
s.push_subseq(&mut b, Interval::new(beg, end));
}
b.build()
}
pub fn len_after_delete(&self) -> usize {
self.count(CountMatcher::Zero)
}
pub fn count(&self, matcher: CountMatcher) -> usize {
self.segments.iter().filter(|seg| matcher.matches(seg)).map(|seg| seg.len).sum()
}
pub fn len(&self) -> usize {
self.count(CountMatcher::All)
}
pub fn is_empty(&self) -> bool {
(self.segments.is_empty()) || ((self.segments.len() == 1) && (self.segments[0].count == 0))
}
pub fn union(&self, other: &Subset) -> Subset {
let mut sb = SubsetBuilder::new();
for zseg in self.zip(other) {
sb.push_segment(zseg.len, zseg.a_count + zseg.b_count);
}
sb.build()
}
pub fn subtract(&self, other: &Subset) -> Subset {
let mut sb = SubsetBuilder::new();
for zseg in self.zip(other) {
assert!(
zseg.a_count >= zseg.b_count,
"can't subtract {} from {}",
zseg.a_count,
zseg.b_count
);
sb.push_segment(zseg.len, zseg.a_count - zseg.b_count);
}
sb.build()
}
pub fn bitxor(&self, other: &Subset) -> Subset {
let mut sb = SubsetBuilder::new();
for zseg in self.zip(other) {
sb.push_segment(zseg.len, zseg.a_count ^ zseg.b_count);
}
sb.build()
}
fn transform(&self, other: &Subset, union: bool) -> Subset {
let mut sb = SubsetBuilder::new();
let mut seg_iter = self.segments.iter();
let mut cur_seg = Segment { len: 0, count: 0 };
for oseg in &other.segments {
if oseg.count > 0 {
sb.push_segment(oseg.len, if union { oseg.count } else { 0 });
} else {
let mut to_be_consumed = oseg.len;
while to_be_consumed > 0 {
if cur_seg.len == 0 {
cur_seg = seg_iter
.next()
.expect("self must cover all 0-regions of other")
.clone();
}
let to_consume = cmp::min(cur_seg.len, to_be_consumed);
sb.push_segment(to_consume, cur_seg.count);
to_be_consumed -= to_consume;
cur_seg.len -= to_consume;
}
}
}
assert_eq!(cur_seg.len, 0, "the 0-regions of other must be the size of self");
assert_eq!(seg_iter.next(), None, "the 0-regions of other must be the size of self");
sb.build()
}
pub fn transform_expand(&self, other: &Subset) -> Subset {
self.transform(other, false)
}
pub fn transform_union(&self, other: &Subset) -> Subset {
self.transform(other, true)
}
pub fn transform_shrink(&self, other: &Subset) -> Subset {
let mut sb = SubsetBuilder::new();
for zseg in self.zip(other) {
if zseg.b_count == 0 {
sb.push_segment(zseg.len, zseg.a_count);
}
}
sb.build()
}
pub fn range_iter(&self, matcher: CountMatcher) -> RangeIter {
RangeIter { seg_iter: self.segments.iter(), consumed: 0, matcher }
}
pub fn complement_iter(&self) -> RangeIter {
self.range_iter(CountMatcher::Zero)
}
pub fn zip<'a>(&'a self, other: &'a Subset) -> ZipIter<'a> {
ZipIter {
a_segs: self.segments.as_slice(),
b_segs: other.segments.as_slice(),
a_i: 0,
b_i: 0,
a_consumed: 0,
b_consumed: 0,
consumed: 0,
}
}
pub fn complement(&self) -> Subset {
let mut sb = SubsetBuilder::new();
for seg in &self.segments {
if seg.count == 0 {
sb.push_segment(seg.len, 1);
} else {
sb.push_segment(seg.len, 0);
}
}
sb.build()
}
pub fn mapper(&self, matcher: CountMatcher) -> Mapper {
Mapper {
range_iter: self.range_iter(matcher),
last_i: 0,
cur_range: (0, 0),
subset_amount_consumed: 0,
}
}
}
impl fmt::Debug for Subset {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if f.alternate() {
for s in &self.segments {
let chr = if s.count == 0 {
'-'
} else if s.count == 1 {
'#'
} else if s.count <= 9 {
((s.count as u8) + b'0') as char
} else {
'+'
};
for _ in 0..s.len {
write!(f, "{}", chr)?;
}
}
Ok(())
} else {
f.debug_tuple("Subset").field(&self.segments).finish()
}
}
}
pub struct RangeIter<'a> {
seg_iter: slice::Iter<'a, Segment>,
pub consumed: usize,
matcher: CountMatcher,
}
impl<'a> Iterator for RangeIter<'a> {
type Item = (usize, usize);
fn next(&mut self) -> Option<(usize, usize)> {
while let Some(seg) = self.seg_iter.next() {
self.consumed += seg.len;
if self.matcher.matches(seg) {
return Some((self.consumed - seg.len, self.consumed));
}
}
None
}
}
pub struct ZipIter<'a> {
a_segs: &'a [Segment],
b_segs: &'a [Segment],
a_i: usize,
b_i: usize,
a_consumed: usize,
b_consumed: usize,
pub consumed: usize,
}
#[derive(Clone, Debug)]
pub struct ZipSegment {
len: usize,
a_count: usize,
b_count: usize,
}
impl<'a> Iterator for ZipIter<'a> {
type Item = ZipSegment;
fn next(&mut self) -> Option<ZipSegment> {
match (self.a_segs.get(self.a_i), self.b_segs.get(self.b_i)) {
(None, None) => None,
(None, Some(_)) | (Some(_), None) => {
panic!("can't zip Subsets of different base lengths.")
}
(
Some(&Segment { len: a_len, count: a_count }),
Some(&Segment { len: b_len, count: b_count }),
) => {
let len = if a_len + self.a_consumed == b_len + self.b_consumed {
self.a_consumed += a_len;
self.a_i += 1;
self.b_consumed += b_len;
self.b_i += 1;
self.a_consumed - self.consumed
} else if a_len + self.a_consumed < b_len + self.b_consumed {
self.a_consumed += a_len;
self.a_i += 1;
self.a_consumed - self.consumed
} else {
self.b_consumed += b_len;
self.b_i += 1;
self.b_consumed - self.consumed
};
self.consumed += len;
Some(ZipSegment { len, a_count, b_count })
}
}
}
}
pub struct Mapper<'a> {
range_iter: RangeIter<'a>,
last_i: usize,
cur_range: (usize, usize),
pub subset_amount_consumed: usize,
}
impl<'a> Mapper<'a> {
pub fn doc_index_to_subset(&mut self, i: usize) -> usize {
assert!(
i >= self.last_i,
"method must be called with i in non-decreasing order. i={}<{}=last_i",
i,
self.last_i
);
self.last_i = i;
while i >= self.cur_range.1 {
self.subset_amount_consumed += self.cur_range.1 - self.cur_range.0;
self.cur_range = match self.range_iter.next() {
Some(range) => range,
None => {
self.cur_range = (usize::max_value(), usize::max_value());
return self.subset_amount_consumed;
}
}
}
if i >= self.cur_range.0 {
let dist_in_range = i - self.cur_range.0;
dist_in_range + self.subset_amount_consumed
} else {
self.subset_amount_consumed
}
}
}
#[cfg(test)]
mod tests {
use crate::multiset::*;
use crate::test_helpers::find_deletions;
const TEST_STR: &'static str = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
#[test]
fn test_apply() {
let mut sb = SubsetBuilder::new();
for &(b, e) in &[
(0, 1),
(2, 4),
(6, 11),
(13, 14),
(15, 18),
(19, 23),
(24, 26),
(31, 32),
(33, 35),
(36, 37),
(40, 44),
(45, 48),
(49, 51),
(52, 57),
(58, 59),
] {
sb.add_range(b, e, 1);
}
sb.pad_to_len(TEST_STR.len());
let s = sb.build();
println!("{:?}", s);
assert_eq!("145BCEINQRSTUWZbcdimpvxyz", s.delete_from_string(TEST_STR));
}
#[test]
fn trivial() {
let s = SubsetBuilder::new().build();
assert!(s.is_empty());
}
#[test]
fn test_find_deletions() {
let substr = "015ABDFHJOPQVYdfgloprsuvz";
let s = find_deletions(substr, TEST_STR);
assert_eq!(substr, s.delete_from_string(TEST_STR));
assert!(!s.is_empty())
}
#[test]
fn test_complement() {
let substr = "0456789DEFGHIJKLMNOPQRSTUVWXYZdefghijklmnopqrstuvw";
let s = find_deletions(substr, TEST_STR);
let c = s.complement();
assert_eq!("123ABCabcxyz", c.delete_from_string(TEST_STR));
}
#[test]
fn test_mapper() {
let substr = "469ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwz";
let s = find_deletions(substr, TEST_STR);
let mut m = s.mapper(CountMatcher::NonZero);
assert_eq!(0, m.doc_index_to_subset(0));
assert_eq!(2, m.doc_index_to_subset(2));
assert_eq!(2, m.doc_index_to_subset(2));
assert_eq!(3, m.doc_index_to_subset(3));
assert_eq!(4, m.doc_index_to_subset(4));
assert_eq!(4, m.doc_index_to_subset(5));
assert_eq!(5, m.doc_index_to_subset(7));
assert_eq!(6, m.doc_index_to_subset(8));
assert_eq!(6, m.doc_index_to_subset(8));
assert_eq!(8, m.doc_index_to_subset(60));
assert_eq!(9, m.doc_index_to_subset(61));
assert_eq!(9, m.doc_index_to_subset(62));
}
#[test]
#[should_panic(expected = "non-decreasing")]
fn test_mapper_requires_non_decreasing() {
let substr = "469ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvw";
let s = find_deletions(substr, TEST_STR);
let mut m = s.mapper(CountMatcher::NonZero);
m.doc_index_to_subset(0);
m.doc_index_to_subset(2);
m.doc_index_to_subset(1);
}
#[test]
fn union() {
let s1 = find_deletions("024AEGHJKNQTUWXYZabcfgikqrvy", TEST_STR);
let s2 = find_deletions("14589DEFGIKMOPQRUXZabcdefglnpsuxyz", TEST_STR);
assert_eq!("4EGKQUXZabcfgy", s1.union(&s2).delete_from_string(TEST_STR));
}
fn transform_case(str1: &str, str2: &str, result: &str) {
let s1 = find_deletions(str1, TEST_STR);
let s2 = find_deletions(str2, str1);
let s3 = s2.transform_expand(&s1);
let str3 = s3.delete_from_string(TEST_STR);
assert_eq!(result, str3);
assert_eq!(str2, s1.transform_shrink(&s3).delete_from_string(&str3));
assert_eq!(str2, s2.transform_union(&s1).delete_from_string(TEST_STR));
}
#[test]
fn transform() {
transform_case(
"02345678BCDFGHKLNOPQRTUVXZbcefghjlmnopqrstwx",
"027CDGKLOTUbcegopqrw",
"01279ACDEGIJKLMOSTUWYabcdegikopqruvwyz",
);
transform_case(
"01234678DHIKLMNOPQRUWZbcdhjostvy",
"136KLPQZvy",
"13569ABCEFGJKLPQSTVXYZaefgiklmnpqruvwxyz",
);
transform_case(
"0125789BDEFIJKLMNPVXabdjmrstuwy",
"12BIJVXjmrstu",
"12346ABCGHIJOQRSTUVWXYZcefghijklmnopqrstuvxz",
);
transform_case(
"12456789ABCEFGJKLMNPQRSTUVXYadefghkrtwxz",
"15ACEFGKLPRUVYdhrtx",
"0135ACDEFGHIKLOPRUVWYZbcdhijlmnopqrstuvxy",
);
transform_case(
"0128ABCDEFGIJMNOPQXYZabcfgijkloqruvy",
"2CEFGMZabijloruvy",
"2345679CEFGHKLMRSTUVWZabdehijlmnoprstuvwxyz",
);
transform_case(
"01245689ABCDGJKLMPQSTWXYbcdfgjlmnosvy",
"01245ABCDJLQSWXYgsv",
"0123457ABCDEFHIJLNOQRSUVWXYZaeghikpqrstuvwxz",
);
}
}