use crate::merge_state::InPlaceMergeState;
use crate::{
binary_merge::{EarlyOut, MergeOperation},
dedup::{sort_and_dedup_by_key, Keep},
iterators::SliceIterator,
merge_state::{MergeStateMut, SmallVecMergeState},
VecSet,
};
#[cfg(feature = "serde")]
use core::marker::PhantomData;
use core::{borrow::Borrow, cmp::Ordering, fmt, fmt::Debug, hash, hash::Hash, iter::FromIterator};
#[cfg(feature = "serde")]
use serde::{
de::{Deserialize, Deserializer, MapAccess, Visitor},
ser::{Serialize, SerializeMap, Serializer},
};
use smallvec::{Array, SmallVec};
use std::collections::BTreeMap;
pub struct VecMap<A: Array>(SmallVec<A>);
pub type VecMap1<K, V> = VecMap<[(K, V); 1]>;
impl<T: Debug, A: Array<Item = T>> Debug for VecMap<A> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_set().entries(self.as_slice().iter()).finish()
}
}
impl<T: Clone, A: Array<Item = T>> Clone for VecMap<A> {
fn clone(&self) -> Self {
Self(self.0.clone())
}
}
impl<T: Hash, A: Array<Item = T>> Hash for VecMap<A> {
fn hash<H: hash::Hasher>(&self, state: &mut H) {
self.0.hash(state)
}
}
impl<T: PartialEq, A: Array<Item = T>> PartialEq for VecMap<A> {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
}
impl<T: Eq, A: Array<Item = T>> Eq for VecMap<A> {}
impl<T: PartialOrd, A: Array<Item = T>> PartialOrd for VecMap<A> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.0.partial_cmp(&other.0)
}
}
impl<T: Ord, A: Array<Item = T>> Ord for VecMap<A> {
fn cmp(&self, other: &Self) -> Ordering {
self.0.cmp(&other.0)
}
}
impl<'a, A: Array> IntoIterator for &'a VecMap<A> {
type Item = &'a A::Item;
type IntoIter = core::slice::Iter<'a, A::Item>;
fn into_iter(self) -> Self::IntoIter {
self.0.iter()
}
}
impl<A: Array> IntoIterator for VecMap<A> {
type Item = A::Item;
type IntoIter = smallvec::IntoIter<A>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl<A: Array> Default for VecMap<A> {
fn default() -> Self {
VecMap(SmallVec::default())
}
}
impl<A: Array> From<VecMap<A>> for VecSet<A> {
fn from(value: VecMap<A>) -> Self {
VecSet::new_unsafe(value.0)
}
}
struct CombineOp<F, K>(F, std::marker::PhantomData<K>);
impl<K: Ord, V, A: Array<Item = (K, V)>, B: Array<Item = (K, V)>, F: Fn(V, V) -> V>
MergeOperation<InPlaceMergeState<A, B>> for CombineOp<F, K>
{
fn cmp(&self, a: &(K, V), b: &(K, V)) -> Ordering {
a.0.cmp(&b.0)
}
fn from_a(&self, m: &mut InPlaceMergeState<A, B>, n: usize) -> EarlyOut {
m.advance_a(n, true)
}
fn from_b(&self, m: &mut InPlaceMergeState<A, B>, n: usize) -> EarlyOut {
m.advance_b(n, true)
}
fn collision(&self, m: &mut InPlaceMergeState<A, B>) -> EarlyOut {
if let (Some((ak, av)), Some((_, bv))) = (m.a.pop_front(), m.b.next()) {
let r = (self.0)(av, bv);
m.a.push((ak, r));
}
Some(())
}
}
struct RightBiasedUnionOp;
impl<'a, K: Ord, V, I: MergeStateMut<A = (K, V), B = (K, V)>> MergeOperation<I>
for RightBiasedUnionOp
{
fn cmp(&self, a: &(K, V), b: &(K, V)) -> Ordering {
a.0.cmp(&b.0)
}
fn from_a(&self, m: &mut I, n: usize) -> EarlyOut {
m.advance_a(n, true)
}
fn from_b(&self, m: &mut I, n: usize) -> EarlyOut {
m.advance_b(n, true)
}
fn collision(&self, m: &mut I) -> EarlyOut {
m.advance_a(1, false)?;
m.advance_b(1, true)
}
}
pub enum OuterJoinArg<K, A, B> {
Left(K, A),
Right(K, B),
Both(K, A, B),
}
struct OuterJoinOp<F>(F);
struct LeftJoinOp<F>(F);
struct RightJoinOp<F>(F);
struct InnerJoinOp<F>(F);
impl<K: Ord, V, A: Array<Item = (K, V)>> FromIterator<(K, V)> for VecMap<A> {
fn from_iter<I: IntoIterator<Item = A::Item>>(iter: I) -> Self {
VecMap(sort_and_dedup_by_key(iter.into_iter(), |(k, _)| k, Keep::Last).into())
}
}
impl<K, V, A: Array<Item = (K, V)>> From<BTreeMap<K, V>> for VecMap<A> {
fn from(value: BTreeMap<K, V>) -> Self {
Self::new(value.into_iter().collect())
}
}
impl<K: Ord + 'static, V, A: Array<Item = (K, V)>> Extend<A::Item> for VecMap<A> {
fn extend<I: IntoIterator<Item = (K, V)>>(&mut self, iter: I) {
self.merge_with::<A>(iter.into_iter().collect());
}
}
impl<A: Array> AsRef<[A::Item]> for VecMap<A> {
fn as_ref(&self) -> &[A::Item] {
self.as_slice()
}
}
impl<A: Array> Into<SmallVec<A>> for VecMap<A> {
fn into(self) -> SmallVec<A> {
self.0
}
}
impl<
'a,
K: Ord + Clone,
A,
B,
R,
Arr: Array<Item = (K, R)>,
F: Fn(OuterJoinArg<&K, &A, &B>) -> Option<R>,
> MergeOperation<SmallVecMergeState<'a, (K, A), (K, B), Arr>> for OuterJoinOp<F>
{
fn cmp(&self, a: &(K, A), b: &(K, B)) -> Ordering {
a.0.cmp(&b.0)
}
fn from_a(&self, m: &mut SmallVecMergeState<'a, (K, A), (K, B), Arr>, n: usize) -> EarlyOut {
for _ in 0..n {
if let Some((k, a)) = m.a.next() {
let arg = OuterJoinArg::Left(k, a);
if let Some(res) = (self.0)(arg) {
m.r.push((k.clone(), res));
}
}
}
Some(())
}
fn from_b(&self, m: &mut SmallVecMergeState<'a, (K, A), (K, B), Arr>, n: usize) -> EarlyOut {
for _ in 0..n {
if let Some((k, b)) = m.b.next() {
let arg = OuterJoinArg::Right(k, b);
if let Some(res) = (self.0)(arg) {
m.r.push((k.clone(), res));
}
}
}
Some(())
}
fn collision(&self, m: &mut SmallVecMergeState<'a, (K, A), (K, B), Arr>) -> EarlyOut {
if let Some((k, a)) = m.a.next() {
if let Some((_, b)) = m.b.next() {
let arg = OuterJoinArg::Both(k, a, b);
if let Some(res) = (self.0)(arg) {
m.r.push((k.clone(), res));
}
}
}
Some(())
}
}
impl<
'a,
K: Ord + Clone,
A,
B,
R,
Arr: Array<Item = (K, R)>,
F: Fn(&K, &A, Option<&B>) -> Option<R>,
> MergeOperation<SmallVecMergeState<'a, (K, A), (K, B), Arr>> for LeftJoinOp<F>
{
fn cmp(&self, a: &(K, A), b: &(K, B)) -> Ordering {
a.0.cmp(&b.0)
}
fn from_a(&self, m: &mut SmallVecMergeState<'a, (K, A), (K, B), Arr>, n: usize) -> EarlyOut {
for _ in 0..n {
if let Some((k, a)) = m.a.next() {
if let Some(res) = (self.0)(k, a, None) {
m.r.push((k.clone(), res));
}
}
}
Some(())
}
fn from_b(&self, m: &mut SmallVecMergeState<'a, (K, A), (K, B), Arr>, n: usize) -> EarlyOut {
m.b.drop_front(n);
Some(())
}
fn collision(&self, m: &mut SmallVecMergeState<'a, (K, A), (K, B), Arr>) -> EarlyOut {
if let Some((k, a)) = m.a.next() {
if let Some((_, b)) = m.b.next() {
if let Some(res) = (self.0)(k, a, Some(b)) {
m.r.push((k.clone(), res));
}
}
}
Some(())
}
}
impl<
'a,
K: Ord + Clone,
A,
B,
R,
Arr: Array<Item = (K, R)>,
F: Fn(&K, Option<&A>, &B) -> Option<R>,
> MergeOperation<SmallVecMergeState<'a, (K, A), (K, B), Arr>> for RightJoinOp<F>
{
fn cmp(&self, a: &(K, A), b: &(K, B)) -> Ordering {
a.0.cmp(&b.0)
}
fn from_a(&self, m: &mut SmallVecMergeState<'a, (K, A), (K, B), Arr>, n: usize) -> EarlyOut {
m.a.drop_front(n);
Some(())
}
fn from_b(&self, m: &mut SmallVecMergeState<'a, (K, A), (K, B), Arr>, n: usize) -> EarlyOut {
for _ in 0..n {
if let Some((k, b)) = m.b.next() {
if let Some(res) = (self.0)(k, None, b) {
m.r.push((k.clone(), res));
}
}
}
Some(())
}
fn collision(&self, m: &mut SmallVecMergeState<'a, (K, A), (K, B), Arr>) -> EarlyOut {
if let Some((k, a)) = m.a.next() {
if let Some((_, b)) = m.b.next() {
if let Some(res) = (self.0)(k, Some(a), b) {
m.r.push((k.clone(), res));
}
}
}
Some(())
}
}
impl<'a, K: Ord + Clone, A, B, R, Arr: Array<Item = (K, R)>, F: Fn(&K, &A, &B) -> Option<R>>
MergeOperation<SmallVecMergeState<'a, (K, A), (K, B), Arr>> for InnerJoinOp<F>
{
fn cmp(&self, a: &(K, A), b: &(K, B)) -> Ordering {
a.0.cmp(&b.0)
}
fn from_a(&self, m: &mut SmallVecMergeState<'a, (K, A), (K, B), Arr>, n: usize) -> EarlyOut {
m.a.drop_front(n);
Some(())
}
fn from_b(&self, m: &mut SmallVecMergeState<'a, (K, A), (K, B), Arr>, n: usize) -> EarlyOut {
m.b.drop_front(n);
Some(())
}
fn collision(&self, m: &mut SmallVecMergeState<'a, (K, A), (K, B), Arr>) -> EarlyOut {
if let Some((k, a)) = m.a.next() {
if let Some((_, b)) = m.b.next() {
if let Some(res) = (self.0)(k, a, b) {
m.r.push((k.clone(), res));
}
}
}
Some(())
}
}
impl<K, V, A: Array<Item = (K, V)>> VecMap<A> {
pub fn map_values<R, B: Array<Item = (K, R)>, F: FnMut(V) -> R>(self, mut f: F) -> VecMap<B> {
VecMap::new(
self.0
.into_iter()
.map(|entry| (entry.0, f(entry.1)))
.collect(),
)
}
}
impl<A: Array> VecMap<A> {
pub(crate) fn new(value: SmallVec<A>) -> Self {
Self(value)
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn empty() -> Self {
Self(SmallVec::new())
}
pub fn len(&self) -> usize {
self.0.len()
}
fn as_slice(&self) -> &[A::Item] {
self.0.as_ref()
}
pub fn retain<F: FnMut(&A::Item) -> bool>(&mut self, mut f: F) {
self.0.retain(|entry| f(entry))
}
pub(crate) fn slice_iter(&self) -> SliceIterator<A::Item> {
SliceIterator(self.0.as_slice())
}
pub fn into_inner(self) -> SmallVec<A> {
self.0
}
pub fn single(item: A::Item) -> Self {
Self(smallvec::smallvec![item])
}
}
impl<K: Ord + 'static, V, A: Array<Item = (K, V)>> VecMap<A> {
pub fn insert(&mut self, key: K, value: V) -> Option<V> {
match self.0.binary_search_by(|(k, _)| k.cmp(&key)) {
Ok(index) => {
let mut elem = (key, value);
std::mem::swap(&mut elem, &mut self.0[index]);
Some(elem.1)
}
Err(ip) => {
self.0.insert(ip, (key, value));
None
}
}
}
pub fn merge_with<B: Array<Item = (K, V)>>(&mut self, rhs: VecMap<B>) {
InPlaceMergeState::merge(&mut self.0, rhs.0, RightBiasedUnionOp)
}
pub fn combine_with<B: Array<Item = A::Item>, F: Fn(V, V) -> V>(
&mut self,
that: VecMap<B>,
f: F,
) {
InPlaceMergeState::merge(&mut self.0, that.0, CombineOp(f, core::marker::PhantomData));
}
}
impl<K: Ord + 'static, V, A: Array<Item = (K, V)>> VecMap<A> {
pub fn get<Q>(&self, key: &Q) -> Option<&V>
where
K: Borrow<Q>,
Q: Ord + ?Sized,
{
let elements = self.0.as_slice();
elements
.binary_search_by(|p| p.0.borrow().cmp(key))
.map(|index| &elements[index].1)
.ok()
}
pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
where
K: Borrow<Q>,
Q: Ord + ?Sized,
{
let elements = self.0.as_mut_slice();
match elements.binary_search_by(|p| p.0.borrow().cmp(key)) {
Ok(index) => Some(&mut elements[index].1),
Err(_) => None,
}
}
}
impl<K: Ord + Clone, V: Clone, A: Array<Item = (K, V)>> VecMap<A> {
pub fn outer_join<
W: Clone,
R,
F: Fn(OuterJoinArg<&K, &V, &W>) -> Option<R>,
B: Array<Item = (K, W)>,
>(
&self,
that: &VecMap<B>,
f: F,
) -> VecMap1<K, R> {
VecMap1::<K, R>::new(SmallVecMergeState::merge(
self.0.as_slice(),
that.0.as_slice(),
OuterJoinOp(f),
))
}
pub fn left_join<
W: Clone,
R,
F: Fn(&K, &V, Option<&W>) -> Option<R>,
B: Array<Item = (K, W)>,
>(
&self,
that: &VecMap1<K, W>,
f: F,
) -> VecMap1<K, R> {
VecMap1::<K, R>::new(SmallVecMergeState::merge(
self.0.as_slice(),
that.0.as_slice(),
LeftJoinOp(f),
))
}
pub fn right_join<
W: Clone,
R,
F: Fn(&K, Option<&V>, &W) -> Option<R>,
B: Array<Item = (K, W)>,
>(
&self,
that: &VecMap<B>,
f: F,
) -> VecMap1<K, R> {
VecMap1::<K, R>::new(SmallVecMergeState::merge(
self.0.as_slice(),
that.0.as_slice(),
RightJoinOp(f),
))
}
pub fn inner_join<W: Clone, R, F: Fn(&K, &V, &W) -> Option<R>, B: Array<Item = (K, W)>>(
&self,
that: &VecMap<B>,
f: F,
) -> VecMap1<K, R> {
VecMap1::<K, R>::new(SmallVecMergeState::merge(
self.0.as_slice(),
that.0.as_slice(),
InnerJoinOp(f),
))
}
}
#[cfg(feature = "serde")]
impl<K, V, A: Array<Item = (K, V)>> Serialize for VecMap<A>
where
K: Serialize,
V: Serialize,
{
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
let mut state = serializer.serialize_map(Some(self.len()))?;
for (k, v) in self.0.iter() {
state.serialize_entry(&k, &v)?;
}
state.end()
}
}
#[cfg(feature = "serde")]
impl<'de, K, V, A: Array<Item = (K, V)>> Deserialize<'de> for VecMap<A>
where
K: Deserialize<'de> + Ord + PartialEq + Clone,
V: Deserialize<'de>,
{
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
deserializer.deserialize_map(VecMapVisitor {
phantom: PhantomData,
})
}
}
#[cfg(feature = "serde")]
struct VecMapVisitor<K, V, A> {
phantom: PhantomData<(K, V, A)>,
}
#[cfg(feature = "serde")]
impl<'de, K, V, A> Visitor<'de> for VecMapVisitor<K, V, A>
where
A: Array<Item = (K, V)>,
K: Deserialize<'de> + Ord + PartialEq + Clone,
V: Deserialize<'de>,
{
type Value = VecMap<A>;
fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str("a map")
}
fn visit_map<M: MapAccess<'de>>(self, mut map: M) -> Result<Self::Value, M::Error> {
let len = map.size_hint().unwrap_or(0);
let mut values: SmallVec<A> = SmallVec::with_capacity(len);
while let Some(value) = map.next_entry::<K, V>()? {
values.push(value);
}
values.sort_by_key(|x: &(K, V)| x.0.clone());
values.dedup_by_key(|x: &mut (K, V)| x.0.clone());
Ok(VecMap(values))
}
}
#[cfg(test)]
mod tests {
use super::*;
use maplit::btreemap;
use quickcheck::*;
use std::collections::BTreeMap;
use OuterJoinArg::*;
type Test = VecMap1<i32, i32>;
type Ref = BTreeMap<i32, i32>;
impl<K: Arbitrary + Ord, V: Arbitrary> Arbitrary for VecMap1<K, V> {
fn arbitrary<G: Gen>(g: &mut G) -> Self {
let t: BTreeMap<K, V> = Arbitrary::arbitrary(g);
t.into()
}
}
fn outer_join_reference(a: &Ref, b: &Ref) -> Ref {
let mut r = a.clone();
for (k, v) in b.clone().into_iter() {
r.insert(k, v);
}
r
}
fn inner_join_reference(a: &Ref, b: &Ref) -> Ref {
let mut r: Ref = BTreeMap::new();
for (k, v) in a.clone().into_iter() {
if b.contains_key(&k) {
r.insert(k, v);
}
}
r
}
quickcheck! {
#[cfg(feature = "serde")]
fn serde_roundtrip(reference: Ref) -> bool {
let bytes = serde_json::to_vec(&reference).unwrap();
let deser = serde_json::from_slice(&bytes).unwrap();
reference == deser
}
fn outer_join(a: Ref, b: Ref) -> bool {
let expected: Test = outer_join_reference(&a, &b).into();
let a: Test = a.into();
let b: Test = b.into();
let actual = a.outer_join(&b, |arg| Some(match arg {
Left(_, a) => a.clone(),
Right(_, b) => b.clone(),
Both(_, _, b) => b.clone(),
}));
expected == actual
}
fn inner_join(a: Ref, b: Ref) -> bool {
let expected: Test = inner_join_reference(&a, &b).into();
let a: Test = a.into();
let b: Test = b.into();
let actual = a.inner_join(&b, |_, a,_| Some(a.clone()));
expected == actual
}
}
#[test]
fn smoke_test() {
let a = btreemap! {
1 => 1,
2 => 3,
};
let b = btreemap! {
1 => 2,
3 => 4,
};
let r = outer_join_reference(&a, &b);
let a: Test = a.into();
let b: Test = b.into();
let expected: Test = r.into();
let actual = a.outer_join(&b, |arg| {
Some(match arg {
Left(_, a) => a.clone(),
Right(_, b) => b.clone(),
Both(_, _, b) => b.clone(),
})
});
assert_eq!(actual, expected);
println!("{:?}", actual);
}
}