#![allow(clippy::type_complexity)]
#[cfg(test)]
extern crate quickcheck;
#[cfg(test)]
#[macro_use(quickcheck)]
extern crate quickcheck_macros;
use fmt::{Display, Write};
use fnv::FnvHashMap;
use libipld::{
cbor::{DagCbor, DagCborCodec},
codec::{Decode, Encode},
};
use std::{convert::TryFrom, fmt, hash::Hash, iter::FromIterator, mem::swap, usize};
mod bitmap;
mod util;
use bitmap::*;
use vec_collections::AbstractVecSet;
#[cfg(test)]
mod arb;
#[cfg(test)]
mod size_tests;
pub trait Tag: Hash + Ord + Clone + 'static {}
impl<T: Hash + Ord + Clone + 'static> Tag for T {}
#[cfg(test)]
pub type TagSet<T> = vec_collections::VecSet<[T; 4]>;
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct DnfQuery<T> {
tags: Vec<T>,
sets: Bitmap,
}
impl<T: DagCbor> Encode<DagCborCodec> for DnfQuery<T> {
fn encode<W: std::io::Write>(&self, c: DagCborCodec, w: &mut W) -> anyhow::Result<()> {
w.write_all(&[0x82])?;
self.tags.encode(c, w)?;
self.sets.encode(c, w)?;
Ok(())
}
}
impl<T: DagCbor> Decode<DagCborCodec> for DnfQuery<T> {
fn decode<R: std::io::Read + std::io::Seek>(
c: DagCborCodec,
r: &mut R,
) -> anyhow::Result<Self> {
let (tags, sets) = <(Vec<T>, Bitmap)>::decode(c, r)?;
anyhow::ensure!((sets.columns() as usize) <= tags.len());
Ok(Self { tags, sets })
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TagIndex<T> {
tags: DnfQuery<T>,
events: Vec<u32>,
}
impl<T> Default for TagIndex<T> {
fn default() -> Self {
Self {
tags: DnfQuery::empty(),
events: Default::default(),
}
}
}
impl<T: DagCbor> Encode<DagCborCodec> for TagIndex<T> {
fn encode<W: std::io::Write>(&self, c: DagCborCodec, w: &mut W) -> anyhow::Result<()> {
w.write_all(&[0x82])?;
self.tags.encode(c, w)?;
self.events.encode(c, w)?;
Ok(())
}
}
impl<T: DagCbor> Decode<DagCborCodec> for TagIndex<T> {
fn decode<R: std::io::Read + std::io::Seek>(
c: DagCborCodec,
r: &mut R,
) -> anyhow::Result<Self> {
let (tags, events) = <(DnfQuery<T>, Vec<u32>)>::decode(c, r)?;
anyhow::ensure!(events.iter().all(|x| (*x as usize) < tags.len()));
Ok(Self { tags, events })
}
}
impl<T> DnfQuery<T> {
pub fn empty() -> Self {
Self {
tags: Default::default(),
sets: Default::default(),
}
}
pub fn is_empty(&self) -> bool {
self.sets.rows() == 0
}
pub fn all() -> Self {
Self {
tags: Default::default(),
sets: Bitmap::new(vec![vec![]]),
}
}
pub fn is_all(&self) -> bool {
self.sets.iter().any(|row| row.count() == 0)
}
pub fn terms(&self) -> impl Iterator<Item = impl Iterator<Item = &T>> {
self.sets
.iter()
.map(move |rows| rows.map(move |index| &self.tags[index as usize]))
}
pub(crate) fn term(&self, index: usize) -> impl Iterator<Item = &T> {
self.sets.row(index).map(move |i| &self.tags[i as usize])
}
pub fn len(&self) -> usize {
self.sets.rows()
}
}
impl<T: Tag> DnfQuery<T> {
pub fn new(
terms: impl IntoIterator<Item = impl IntoIterator<Item = T>>,
) -> anyhow::Result<Self> {
let mut builder = DnfQueryBuilder::new();
for term in terms {
builder.push(term)?;
}
Ok(builder.result())
}
pub(crate) fn dnf_query(&self, dnf: &DnfQuery<T>, result: &mut [bool]) {
let translate = dnf
.tags
.iter()
.map(|tag| self.tags.binary_search(tag).map(|x| x as u32).ok())
.collect::<Box<_>>();
dnf_query0(&self.sets, dnf, &translate, result);
}
pub fn matching(&self, index: &TagIndex<T>) -> Vec<bool> {
let mut matching = vec![true; index.len()];
self.set_matching(index, &mut matching);
matching
}
pub fn set_matching(&self, index: &TagIndex<T>, matches: &mut [bool]) {
let mut tmp = vec![false; index.tags.sets.rows()];
for (matching, index) in matches.iter().zip(index.events.iter()) {
if *matching {
tmp[*index as usize] = true;
}
}
index.tags.dnf_query(self, &mut tmp);
for (matching, index) in matches.iter_mut().zip(index.events.iter()) {
*matching = *matching && tmp[*index as usize];
}
}
}
impl<T: Display> Display for DnfQuery<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let term_to_string = |term: Vec<&T>| -> String {
term.iter()
.map(ToString::to_string)
.collect::<Vec<_>>()
.join("&")
};
let res = self
.terms()
.map(|x| term_to_string(x.collect()))
.collect::<Vec<_>>()
.join(" | ");
f.write_str(&res)
}
}
struct DebugUsingDisplay<T>(Vec<T>);
impl<T: Display> fmt::Debug for DebugUsingDisplay<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_char('{')?;
for (i, x) in self.0.iter().enumerate() {
if i > 0 {
f.write_char(',')?;
}
Display::fmt(x, f)?;
}
f.write_char('}')?;
Ok(())
}
}
impl<T> FromIterator<T> for DebugUsingDisplay<T> {
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
Self(Vec::from_iter(iter))
}
}
impl<T: Tag> TagIndex<T> {
pub fn new(
elements: impl IntoIterator<Item = impl IntoIterator<Item = T>>,
) -> anyhow::Result<Self> {
let mut builder = DnfQueryBuilder::new();
let events = elements
.into_iter()
.map(|set| builder.push(set))
.collect::<anyhow::Result<Vec<_>>>()?;
Ok(Self {
tags: builder.result(),
events,
})
}
pub fn get<C: FromIterator<T>>(&self, index: usize) -> Option<C> {
let mask_index = self.events.get(index)?;
let mask = self.tags.sets.row(*mask_index as usize);
let lut = &self.tags.tags;
Some(mask.map(|i| lut[i as usize].clone()).collect())
}
}
impl<T> TagIndex<T> {
pub fn distinct_tags(&self) -> &[T] {
&self.tags.tags
}
pub fn tags(&self) -> impl Iterator<Item = impl Iterator<Item = &T>> + '_ {
self.events
.iter()
.map(move |offset| self.tags.term(*offset as usize))
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn len(&self) -> usize {
self.events.len()
}
pub fn is_dense(&self) -> bool {
self.tags.sets.is_dense()
}
pub fn distinct_sets(&self) -> usize {
self.tags.sets.rows()
}
}
impl<T: Display> Display for TagIndex<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_list()
.entries(
self.tags()
.map(|ts| ts.into_iter().collect::<DebugUsingDisplay<_>>()),
)
.finish()
}
}
struct SliceIntoIter<'a, T>(std::slice::IterMut<'a, T>);
impl<'a, T: Default + 'a> Iterator for SliceIntoIter<'a, T> {
type Item = T;
fn next(&mut self) -> Option<Self::Item> {
self.0.next().map(|x| {
let mut r = T::default();
swap(x, &mut r);
r
})
}
}
fn dnf_query0<T: Tag>(
index: &Bitmap,
dnf: &DnfQuery<T>,
translate: &[Option<u32>],
result: &mut [bool],
) {
match index {
Bitmap::Sparse(index) => {
let dnf = SparseBitmap::new(
dnf.sets
.iter()
.filter_map(|row| {
row.map(|index| translate[index as usize])
.collect::<Option<IndexSet>>()
})
.collect(),
);
for (set, value) in index.iter().zip(result.iter_mut()) {
*value = *value && { dnf.iter().any(move |query| query.is_subset(set)) }
}
}
Bitmap::Dense(index) => {
let dnf = DenseBitmap::new(
dnf.sets
.iter()
.filter_map(|row| {
mask_from_bits_iter(
row.map(|index| translate[index as usize].unwrap_or(MIN_SPARSE_INDEX)),
)
})
.collect(),
);
for (mask, value) in index.iter().zip(result.iter_mut()) {
*value = *value && { dnf.iter().any(move |query| is_subset(*query, *mask)) }
}
}
}
}
#[inline]
fn is_subset(a: IndexMask, b: IndexMask) -> bool {
a & b == a
}
pub(crate) struct TagSetSetIter<'a, T>(&'a [T], BitmapRowsIter<'a>);
impl<'a, T> Iterator for TagSetSetIter<'a, T> {
type Item = TagRefIter<'a, T>;
fn next(&mut self) -> Option<Self::Item> {
self.1.next().map(|iter| TagRefIter(self.0, iter))
}
}
pub(crate) struct TagRefIter<'a, T>(&'a [T], BitmapRowIter<'a>);
impl<'a, T> Iterator for TagRefIter<'a, T> {
type Item = &'a T;
fn next(&mut self) -> Option<Self::Item> {
self.1.next().map(|index| &self.0[index as usize])
}
}
#[derive(Debug)]
pub(crate) struct DnfQueryBuilder<T: Tag> {
tags: FnvHashMap<T, u32>,
sets: FnvHashMap<IndexSet, u32>,
}
impl<T: Tag> DnfQueryBuilder<T> {
pub fn new() -> Self {
Self {
tags: FnvHashMap::default(),
sets: Default::default(),
}
}
fn permutation_table(&self) -> Vec<u32> {
let mut tags = self.tags.iter().collect::<Vec<_>>();
tags.sort_unstable_by_key(|(key, _)| *key);
let mut permutation_table = vec![0u32; tags.len()];
for (i, (_, j)) in tags.into_iter().enumerate() {
permutation_table[*j as usize] = i as u32;
}
permutation_table
}
pub fn result(self) -> DnfQuery<T> {
let perm = self.permutation_table();
let mut tags = vec![None; self.tags.len()];
for (tag, index) in self.tags {
tags[perm[index as usize] as usize] = Some(tag)
}
let tags = tags.into_iter().flatten().collect();
let mut sets = vec![IndexSet::default(); self.sets.len()];
for (set, index) in self.sets {
sets[index as usize] = set
}
let sets = sets
.into_iter()
.map(|indexes| indexes.into_iter().map(|index| perm[index as usize]))
.collect();
DnfQuery { tags, sets }
}
pub fn push(&mut self, tags: impl IntoIterator<Item = T>) -> anyhow::Result<u32> {
let indices = tags.into_iter().map(|tag| self.add_tag(&tag));
let set = indices.collect::<anyhow::Result<IndexSet>>()?;
Ok(if let Some(index) = self.sets.get(&set) {
*index
} else {
let index = u32::try_from(self.sets.len())?;
self.sets.insert(set, index);
index
})
}
fn add_tag(&mut self, tag: &T) -> anyhow::Result<u32> {
Ok(if let Some(index) = self.tags.get(tag) {
*index
} else {
let index = u32::try_from(self.tags.len())?;
self.tags.insert(tag.clone(), index);
index
})
}
}
#[cfg(test)]
mod tests {
use libipld::{
cbor::DagCborCodec,
codec::{assert_roundtrip, Codec},
ipld, DagCbor,
};
use quickcheck::Arbitrary;
#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, DagCbor)]
#[ipld(repr = "value")]
pub struct TestTag(pub String);
impl fmt::Display for TestTag {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl fmt::Debug for TestTag {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl Arbitrary for TestTag {
fn arbitrary(g: &mut quickcheck::Gen) -> Self {
let tag = g.choose(TAG_NAMES).unwrap();
TestTag((*tag).to_owned())
}
}
const TAG_NAMES: &[&str] = &["a", "b", "c", "d", "e", "f"];
fn ts(tags: &str) -> TagSet<TestTag> {
tags.chars()
.filter(|c| char::is_alphanumeric(*c))
.map(|x| TestTag(x.to_string()))
.collect()
}
fn tss(tags: &str) -> Vec<TagSet<TestTag>> {
tags.split(',').map(ts).collect()
}
fn dnf(tags: &str) -> DnfQuery<TestTag> {
let parts = tags.split('|').map(ts).collect::<Vec<_>>();
DnfQuery::new(parts).unwrap()
}
fn ti(tags: &str) -> TagIndex<TestTag> {
TagIndex::new(tss(tags)).unwrap()
}
use super::*;
fn matches(index: &str, query: &str) -> String {
let index = ti(index);
let query = dnf(query);
let mut matches = vec![true; index.len()];
query.set_matching(&index, &mut matches);
matches.iter().map(|x| if *x { '1' } else { '0' }).collect()
}
#[test]
fn tag_index_query_tests() {
assert_eq!(&matches(" a,ab,bc, a", "ab"), "0100");
assert_eq!(&matches(" a, a, a, a", "ab"), "0000");
assert_eq!(&matches(" a, a, a,ab", "ab|c|d"), "0001");
}
#[test]
fn dnf_query() {
assert_eq!(&matches("ab,ac,bc", " ||"), "111");
assert_eq!(&matches("ab,ac,bc", " a"), "110");
assert_eq!(&matches("ab,ac,bc", " ab"), "100");
assert_eq!(&matches("ab,ac,bc", " ax"), "000");
assert_eq!(&matches("ab,ac,bc", " c"), "011");
assert_eq!(&matches("ab,bc,cd", "a|d"), "101");
}
#[quickcheck]
fn set_matching(index: TagIndex<TestTag>, query: DnfQuery<TestTag>) -> bool {
let mut bits1 = vec![true; index.len()];
let mut bits2 = vec![false; index.len()];
query.set_matching(&index, &mut bits1);
for (tags, matching) in index.tags().zip(bits2.iter_mut()) {
let tags: TagSet<TestTag> = tags.cloned().collect();
*matching = query
.terms()
.map(|x| x.cloned().collect::<TagSet<TestTag>>())
.any(|q| q.is_subset(&tags))
}
let bt = bits1
.iter()
.map(|x| if *x { '1' } else { '0' })
.collect::<String>();
println!("{} {} {}", index, query, bt);
bits1 == bits2
}
#[test]
fn dnf_query_ipld() {
let query = dnf("zab|bc|def|gh");
let expected = ipld! {
[["a", "b", "c", "d", "e", "f", "g", "h", "z"], [[0, 1, 8], [1, 2], [3, 4, 5], [6, 7]]]
};
let data = DagCborCodec.encode(&query).unwrap();
println!("{}", hex::encode(data));
assert_roundtrip(DagCborCodec, &query, &expected);
}
#[quickcheck]
fn dnf_query_cbor_roundtrip(value: DnfQuery<TestTag>) -> bool {
let bytes = DagCborCodec.encode(&value).unwrap();
let value1 = DagCborCodec.decode(&bytes).unwrap();
value == value1
}
#[quickcheck]
fn tag_index_cbor_roundtrip(value: TagIndex<TestTag>) -> bool {
let bytes = DagCborCodec.encode(&value).unwrap();
let value1 = DagCborCodec.decode(&bytes).unwrap();
value == value1
}
}