#![forbid(unsafe_code)]
#![warn(clippy::pedantic)]
#![warn(missing_docs)]
#![cfg_attr(docsrs, feature(doc_cfg))]
use std::borrow::Borrow;
use std::cmp::Reverse;
use std::hash::{BuildHasher, Hash};
use std::iter;
use hashbrown::hash_map;
pub use hashbrown::hash_map::DefaultHashBuilder;
use hashbrown::HashMap;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Histogram<K: Hash + Eq, S: BuildHasher = DefaultHashBuilder> {
map: HashMap<K, usize, S>,
}
impl<K: Hash + Eq> Histogram<K, DefaultHashBuilder> {
#[must_use]
pub fn new() -> Self {
Self {
map: HashMap::default(),
}
}
pub fn from_counts(iter: impl IntoIterator<Item = (K, usize)>) -> Self {
HashMap::from_iter(iter).into()
}
}
impl<K: Hash + Eq, S: BuildHasher> Histogram<K, S> {
pub const fn with_hasher(hash_builder: S) -> Self {
Self {
map: HashMap::with_hasher(hash_builder),
}
}
#[must_use]
pub fn num_categories(&self) -> usize {
self.map.len()
}
#[must_use]
pub fn num_instances(&self) -> usize {
self.map.values().sum()
}
pub fn add_ref<'a, Q>(&mut self, val: &'a Q)
where
K: Borrow<Q> + From<&'a Q>,
Q: ?Sized + Hash + Eq,
{
let cnt = self.map.entry_ref(val).or_insert(0);
*cnt += 1;
}
pub fn add_owned(&mut self, val: K) {
let cnt = self.map.entry(val).or_insert(0);
*cnt += 1;
}
pub fn extend_from_owned<I: IntoIterator<Item = K>>(&mut self, iter: I) {
for item in iter {
self.add_owned(item);
}
}
pub fn append(&mut self, other: Self) {
for (key, cnt) in other {
let old = self.map.entry(key).or_default();
*old += cnt;
}
}
pub fn count<Q>(&self, key: &Q) -> usize
where
Q: ?Sized + Hash + Eq,
K: Borrow<Q>,
{
self.map.get(key).copied().unwrap_or(0)
}
pub fn count_rel<Q>(&self, key: &Q) -> f64
where
Q: ?Sized + Hash + Eq,
K: Borrow<Q>,
{
let total = self.num_instances();
if total == 0 {
return 0.0;
}
#[allow(clippy::cast_precision_loss)]
{
self.count(key) as f64 / total as f64
}
}
pub fn iter(&self) -> impl Iterator<Item = (&K, usize)> {
self.into_iter()
}
pub fn iter_rel(&self) -> impl Iterator<Item = (&K, f64)> {
#[allow(clippy::cast_precision_loss)]
{
let total = self.num_instances() as f64;
self.iter().map(move |(k, cnt)| (k, cnt as f64 / total))
}
}
#[must_use]
pub fn sorted_occurrences(self) -> Vec<(K, usize)> {
let mut counts: Vec<_> = self.into_iter().collect();
counts.sort_unstable_by_key(|(_key, cnt)| Reverse(*cnt));
counts
}
pub fn into_std_hash_map(self) -> std::collections::HashMap<K, usize> {
self.map.into_iter().collect()
}
}
impl<K: Hash + Eq, S: BuildHasher + Default> Histogram<K, S> {
pub fn from_owned_iter<I>(iter: I) -> Self
where
I: IntoIterator<Item = K>,
{
let mut h = Self::default();
h.extend_from_owned(iter);
h
}
}
impl<K: Hash + Eq, S: BuildHasher + Default> Default for Histogram<K, S> {
fn default() -> Self {
Self {
map: HashMap::default(),
}
}
}
impl<'a, K, S, Q> Extend<&'a Q> for Histogram<K, S>
where
K: Hash + Eq + Borrow<Q> + From<&'a Q>,
Q: ?Sized + Hash + Eq + 'a,
S: BuildHasher,
{
fn extend<T: IntoIterator<Item = &'a Q>>(&mut self, iter: T) {
for item in iter {
self.add_ref(item);
}
}
}
impl<'a, K, S, Q> FromIterator<&'a Q> for Histogram<K, S>
where
K: Hash + Eq + Borrow<Q> + From<&'a Q>,
Q: ?Sized + Hash + Eq + 'a,
S: BuildHasher + Default,
{
fn from_iter<T: IntoIterator<Item = &'a Q>>(iter: T) -> Self {
let mut h = Self {
map: HashMap::with_hasher(Default::default()),
};
h.extend(iter);
h
}
}
impl<'a, K: Hash + Eq + 'a, S: BuildHasher> IntoIterator for &'a Histogram<K, S> {
type Item = (&'a K, usize);
type IntoIter = iter::Map<hash_map::Iter<'a, K, usize>, fn((&'a K, &'a usize)) -> Self::Item>;
fn into_iter(self) -> Self::IntoIter {
fn deref_cnt<'a, K>((key, cnt): (&'a K, &'a usize)) -> (&'a K, usize) {
(key, *cnt)
}
self.map.iter().map(deref_cnt)
}
}
impl<K: Hash + Eq, S: BuildHasher> IntoIterator for Histogram<K, S> {
type Item = (K, usize);
type IntoIter = hash_map::IntoIter<K, usize>;
fn into_iter(self) -> Self::IntoIter {
self.map.into_iter()
}
}
impl<K: Hash + Eq, S: BuildHasher> From<HashMap<K, usize, S>> for Histogram<K, S> {
fn from(map: HashMap<K, usize, S>) -> Self {
Self { map }
}
}
impl<K: Hash + Eq, H: BuildHasher> From<Histogram<K, H>> for HashMap<K, usize, H> {
fn from(hist: Histogram<K, H>) -> Self {
hist.map
}
}
#[cfg(feature = "serde")]
mod serde {
use std::hash::{BuildHasher, Hash};
use hashbrown::HashMap;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use super::Histogram;
#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
impl<K, S> Serialize for Histogram<K, S>
where
K: Hash + Eq + Serialize,
S: BuildHasher,
{
fn serialize<Ser>(&self, serializer: Ser) -> Result<Ser::Ok, Ser::Error>
where
Ser: Serializer,
{
self.map.serialize(serializer)
}
}
#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
impl<'de, K, S> Deserialize<'de> for Histogram<K, S>
where
K: Hash + Eq + Deserialize<'de>,
S: BuildHasher + Default,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
Ok(Self {
map: HashMap::deserialize(deserializer)?,
})
}
}
}