#![warn(
clippy::all,
future_incompatible,
missing_copy_implementations,
missing_debug_implementations,
missing_docs,
nonstandard_style,
rust_2018_idioms,
rust_2021_compatibility,
single_use_lifetimes,
trivial_casts,
unreachable_pub,
unused
)]
use std::{
borrow::Borrow,
collections::{hash_map, HashMap},
fmt::Debug,
hash::Hash,
};
use typed_index_collections::TiVec;
#[derive(Debug, Clone)]
pub struct AtomTable<T, I>
where
I: From<usize>,
{
vec: TiVec<I, T>,
map: HashMap<T, I>,
}
impl<V, I> AtomTable<V, I>
where
I: From<usize>,
{
#[must_use]
pub fn new() -> Self
where
I: Copy,
{
Self::default()
}
pub fn iter(&self) -> impl Iterator<Item = &V> {
self.vec.iter()
}
pub fn iter_enumerated(&self) -> impl Iterator<Item = (I, &V)> {
self.vec.iter_enumerated()
}
#[must_use]
pub fn get(&self, id: I) -> Option<&V>
where
usize: From<I>,
{
self.vec.get(id)
}
pub fn get_or_create_id_for_owned_value(&mut self, value: V) -> I
where
V: Clone + Hash + Eq,
I: Copy,
{
if let Some(id) = self.map.get(&value) {
return *id;
}
self.insert_known_unique_value(value)
}
pub fn get_or_create_id<Q>(&mut self, value: &Q) -> I
where
I: Copy,
V: Clone + Hash + Eq + Borrow<Q>,
Q: ?Sized + Hash + Eq + ToOwned<Owned = V>,
{
if let Some(id) = self.get_id(value) {
return id;
}
self.insert_known_unique_value(value.to_owned())
}
fn insert_known_unique_value(&mut self, value: V) -> I
where
V: Clone + Hash + Eq,
I: Copy,
{
let id = self.vec.push_and_get_key(value.clone());
let insert_result = self.map.insert(value, id);
assert!(insert_result.is_none());
id
}
#[must_use]
pub fn get_id<Q>(&self, value: &Q) -> Option<I>
where
I: Copy,
V: Hash + Eq + Borrow<Q>,
Q: Hash + Eq + ?Sized,
{
self.map.get(value).copied()
}
}
impl<T, I> Default for AtomTable<T, I>
where
I: From<usize> + Copy,
{
fn default() -> Self {
Self {
vec: TiVec::default(),
map: HashMap::default(),
}
}
}
impl<V, I> PartialEq for AtomTable<V, I>
where
I: From<usize>,
V: PartialEq,
{
fn eq(&self, other: &Self) -> bool {
self.vec == other.vec
}
}
impl<V, I> Eq for AtomTable<V, I>
where
I: From<usize>,
V: Eq,
{
}
impl<V, I> IntoIterator for AtomTable<V, I>
where
I: From<usize>,
{
type Item = V;
type IntoIter = <TiVec<I, V> as IntoIterator>::IntoIter;
fn into_iter(self) -> Self::IntoIter {
self.vec.into_iter()
}
}
impl<V, I> Extend<V> for AtomTable<V, I>
where
V: Hash + Eq + Clone,
I: From<usize> + Copy,
{
fn extend<T: IntoIterator<Item = V>>(&mut self, iter: T) {
for val in iter {
let _ = self.get_or_create_id_for_owned_value(val);
}
}
}
impl<V, I> FromIterator<V> for AtomTable<V, I>
where
I: From<usize>,
V: Clone + Hash + Eq,
{
fn from_iter<T: IntoIterator<Item = V>>(iter: T) -> Self {
let mut map = HashMap::new();
let mut vec = TiVec::new();
for val in iter {
if let hash_map::Entry::Vacant(entry) = map.entry(val.clone()) {
let id = vec.push_and_get_key(val);
let _ = entry.insert(id);
}
}
Self { vec, map }
}
}
impl<V, I> From<AtomTable<V, I>> for Vec<V>
where
I: From<usize>,
{
fn from(value: AtomTable<V, I>) -> Self {
value.vec.into()
}
}
#[cfg(feature = "transform")]
#[derive(Debug, thiserror::Error, Clone, Copy, PartialEq, Eq)]
#[error("The transform function output the same value for two unique input values, so an atom table cannot be constructed from the outputs")]
pub struct NonUniqueTransformOutputError;
#[cfg(feature = "transform")]
#[derive(Debug, thiserror::Error)]
pub enum TransformResError<E> {
#[error("The transform function returned an error: {0}")]
TransformFunctionError(#[source] E),
#[error(transparent)]
NonUniqueOutput(#[from] NonUniqueTransformOutputError),
}
#[cfg(feature = "transform")]
impl<E> TransformResError<E> {
#[must_use]
pub const fn as_non_unique_output(&self) -> Option<NonUniqueTransformOutputError> {
if let Self::NonUniqueOutput(v) = self {
Some(*v)
} else {
None
}
}
#[must_use]
pub const fn as_transform_function_error(&self) -> Option<&E> {
if let Self::TransformFunctionError(v) = self {
Some(v)
} else {
None
}
}
}
#[cfg(feature = "transform")]
impl<V, I> AtomTable<V, I>
where
I: From<usize>,
{
pub fn try_transform<U: Hash + Eq + Clone>(
&self,
mut f: impl FnMut(&V) -> U,
) -> Result<AtomTable<U, I>, NonUniqueTransformOutputError>
where
I: Eq + Debug,
{
self.try_transform_res(move |val| -> Result<U, ()> { Ok(f(val)) })
.map_err(|e| {
e.as_non_unique_output()
.expect("Nowhere to introduce a different kind of error")
})
}
pub fn try_transform_res<U: Hash + Eq + Clone, E>(
&self,
mut f: impl FnMut(&V) -> Result<U, E>,
) -> Result<AtomTable<U, I>, TransformResError<E>>
where
I: Eq + Debug,
{
let mut vec: TiVec<I, U> = TiVec::default();
let mut map: HashMap<U, I> = HashMap::default();
vec.reserve(self.vec.len());
for (id, val) in self.vec.iter_enumerated() {
let new_val = f(val).map_err(TransformResError::TransformFunctionError)?;
let new_id = vec.push_and_get_key(new_val.clone());
assert_eq!(new_id, id);
let old_id_for_this_val = map.insert(new_val, new_id);
if old_id_for_this_val.is_some() {
return Err(TransformResError::from(NonUniqueTransformOutputError));
}
}
Ok(AtomTable { vec, map })
}
}
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use super::*;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
struct Id(usize);
impl From<usize> for Id {
fn from(value: usize) -> Self {
Self(value)
}
}
impl From<Id> for usize {
fn from(value: Id) -> Self {
value.0
}
}
#[test]
fn read_basics() {
let table: AtomTable<String, Id> = vec!["a", "b", "c"]
.into_iter()
.map(str::to_string)
.collect();
assert!(table.get_id("a").is_some());
let a = table.get_id("a").unwrap();
assert_eq!(table.get(a).unwrap(), "a");
assert!(table.get_id("b").is_some());
let b = table.get_id("b").unwrap();
assert_eq!(table.get(b).unwrap(), "b");
assert_ne!(a, b);
assert!(table.get_id("c").is_some());
let c = table.get_id("c").unwrap();
assert_eq!(table.get(c).unwrap(), "c");
assert_ne!(a, c);
assert_ne!(b, c);
assert!(table.get_id("d").is_none());
let bad_id = Id::from(usize::from(c) + 200);
assert_ne!(a, bad_id);
assert_ne!(b, bad_id);
assert_ne!(c, bad_id);
assert!(table.get(bad_id).is_none());
}
#[test]
fn mut_basics() {
let mut table: AtomTable<String, Id> = vec!["a", "b", "c"]
.into_iter()
.map(str::to_string)
.collect();
let string_a = "a".to_string();
let a = table.get_id(&string_a).unwrap();
assert_eq!(table.get_or_create_id(&string_a), a);
assert_eq!(table.get_or_create_id_for_owned_value(string_a.clone()), a);
let string_b = "b".to_string();
let b = table.get_id(&string_b).unwrap();
assert_eq!(table.get_or_create_id(&string_b), b);
assert_eq!(table.get_or_create_id_for_owned_value(string_b.clone()), b);
let string_c = "c";
let c = table.get_id(string_c).unwrap();
assert_eq!(table.get_or_create_id(string_c), c);
assert_eq!(
table.get_or_create_id_for_owned_value(string_c.to_owned()),
c
);
let string_d = "d".to_string();
let d = table.get_or_create_id(&string_d);
assert_ne!(a, d);
assert_ne!(b, d);
assert_ne!(c, d);
assert_eq!(table.get_id("d"), Some(d));
table.extend(vec![
"d".to_string(),
"e".to_string(),
"f".to_string(),
"g".to_string(),
]);
assert_eq!(table.get_id("d"), Some(d));
assert!(table.get_id("e").is_some());
assert!(table.get_id("f").is_some());
assert!(table.get_id("g").is_some());
let e = table.get_id("e").unwrap();
let f = table.get_id("f").unwrap();
let g = table.get_id("g").unwrap();
let ids = vec![a, b, c, d, e, f, g];
let unique_ids: HashSet<Id> = ids.iter().copied().collect();
assert_eq!(ids.len(), unique_ids.len());
}
fn extend_empty_using_abc_vec(
mut table: AtomTable<String, Id>,
) -> (AtomTable<String, Id>, Id, Id, Id) {
table.extend(vec!["a".to_string(), "b".to_string(), "c".to_string()]);
let a = table.get_id("a").unwrap();
let b = table.get_id("b").unwrap();
let c = table.get_id("c").unwrap();
assert_eq!(table.get(a).unwrap(), "a");
assert_eq!(table.get(b).unwrap(), "b");
assert_eq!(table.get(c).unwrap(), "c");
assert!(table.get_id("d").is_none());
(table, a, b, c)
}
#[test]
fn start_from_new() {
let (_table, _a, _b, _c) = extend_empty_using_abc_vec(AtomTable::new());
}
#[test]
fn start_from_default() {
let (_table, _a, _b, _c) = extend_empty_using_abc_vec(AtomTable::default());
}
#[test]
fn test_iteration() {
let (table, a, b, c) = extend_empty_using_abc_vec(AtomTable::default());
{
let values: Vec<&str> = table.iter().map(String::as_str).collect();
assert_eq!(values, vec!["a", "b", "c"]);
}
{
let values: Vec<(Id, &str)> = table
.iter_enumerated()
.map(|(id, s)| (id, s.as_str()))
.collect();
assert_eq!(values, vec![(a, "a"), (b, "b"), (c, "c")]);
assert_eq!(values, vec![(Id(0), "a"), (Id(1), "b"), (Id(2), "c")]);
}
{
let values: Vec<String> = table.into_iter().collect();
assert_eq!(values, vec!["a", "b", "c"]);
}
}
#[test]
fn test_comparison_and_cloning() {
let (mut table, a, b, c) = extend_empty_using_abc_vec(AtomTable::default());
let cloned_table = table.clone();
assert_eq!(cloned_table.get(a).unwrap(), "a");
assert_eq!(cloned_table.get(b).unwrap(), "b");
assert_eq!(cloned_table.get(c).unwrap(), "c");
assert!(cloned_table.get_id("d").is_none());
assert_eq!(table, cloned_table);
let d = table.get_or_create_id_for_owned_value("d".to_string());
assert_ne!(table, cloned_table);
assert_eq!(table.get_id("d").unwrap(), d);
assert!(cloned_table.get_id("d").is_none());
}
#[test]
fn test_conversion() {
let (table, _a, _b, _c) = extend_empty_using_abc_vec(AtomTable::default());
let converted_to_vec: Vec<String> = table.into();
assert_eq!(converted_to_vec, vec!["a", "b", "c"]);
}
#[test]
#[cfg(feature = "transform")]
fn transform() {
let table: AtomTable<String, Id> = vec!["a", "b", "c"]
.into_iter()
.map(str::to_string)
.collect();
let a = table.get_id("a").unwrap();
let b = table.get_id("b").unwrap();
let c = table.get_id("c").unwrap();
{
let uppercase_table = table
.try_transform(|s| s.to_uppercase())
.expect("This should not have any duplicate values");
assert_eq!(uppercase_table.get(a).unwrap(), "A");
assert_eq!(uppercase_table.get(b).unwrap(), "B");
assert_eq!(uppercase_table.get(c).unwrap(), "C");
}
{
let result = table.try_transform(|s| s == "b");
assert!(result.is_err());
assert_eq!(result.unwrap_err(), NonUniqueTransformOutputError);
}
}
#[test]
#[cfg(feature = "transform")]
fn transform_res() {
#[derive(Debug, PartialEq, Eq, Clone, Copy, thiserror::Error)]
#[error("We do not like the letter B in this collection, to give us a reason to return an error")]
struct DoNotLikeLetterB;
let table: AtomTable<String, Id> = vec!["a", "b", "c"]
.into_iter()
.map(str::to_string)
.collect();
let a = table.get_id("a").unwrap();
let b = table.get_id("b").unwrap();
let c = table.get_id("c").unwrap();
{
let uppercase_table = table
.try_transform_res(|s| -> Result<String, ()> { Ok(s.to_uppercase()) })
.expect("This should not have any duplicate values or errors");
assert_eq!(uppercase_table.get(a).unwrap(), "A");
assert_eq!(uppercase_table.get(b).unwrap(), "B");
assert_eq!(uppercase_table.get(c).unwrap(), "C");
}
{
let result = table.try_transform_res(|s| {
if s == "b" {
Err(DoNotLikeLetterB)
} else {
Ok(s.to_uppercase())
}
});
assert!(result.is_err());
assert_eq!(
*result
.as_ref()
.unwrap_err()
.as_transform_function_error()
.unwrap(),
DoNotLikeLetterB
);
assert!(result
.as_ref()
.unwrap_err()
.to_string()
.contains("do not like the letter B"));
assert!(result
.as_ref()
.unwrap_err()
.to_string()
.contains("The transform function returned an error"));
assert!(result.unwrap_err().as_non_unique_output().is_none());
}
{
let result = table.try_transform_res(|s| -> Result<bool, ()> { Ok(s == "b") });
assert!(result.is_err());
assert!(result
.as_ref()
.unwrap_err()
.as_non_unique_output()
.is_some());
assert!(result
.as_ref()
.unwrap_err()
.as_non_unique_output()
.unwrap()
.to_string()
.contains("The transform function output the same value"));
assert!(result.unwrap_err().as_transform_function_error().is_none());
}
}
}