forest/cid_collections/
hash_set.rs1use super::*;
5use anyhow::Context as _;
6use bytes::Bytes;
7use cid::Cid;
8
9#[cfg(doc)]
10use std::collections::HashSet;
11use std::{path::Path, sync::LazyLock};
12
13pub trait CidHashSetLike {
14 fn insert(&mut self, cid: Cid) -> anyhow::Result<bool>;
18}
19
20#[derive(Default, Clone, Debug, PartialEq, Eq)]
24pub struct CidHashSet {
25 inner: CidHashMap<()>,
26}
27
28impl CidHashSet {
29 pub fn new() -> Self {
33 Self::default()
34 }
35
36 pub fn insert(&mut self, cid: Cid) -> bool {
42 self.inner.insert(cid, ()).is_none()
43 }
44
45 pub fn len(&self) -> usize {
47 self.inner.len()
48 }
49
50 #[allow(dead_code)]
52 pub fn contains(&self, cid: &Cid) -> bool {
53 self.inner.contains_key(cid)
54 }
55
56 #[allow(dead_code)]
58 pub fn remove(&mut self, cid: &Cid) -> bool {
59 self.inner.remove(cid).is_some()
60 }
61
62 #[allow(dead_code)]
64 pub fn is_empty(&self) -> bool {
65 self.inner.is_empty()
66 }
67}
68
69impl CidHashSetLike for CidHashSet {
70 fn insert(&mut self, cid: Cid) -> anyhow::Result<bool> {
71 Ok(self.insert(cid))
72 }
73}
74
75impl Extend<Cid> for CidHashSet {
80 fn extend<T: IntoIterator<Item = Cid>>(&mut self, iter: T) {
81 self.inner.extend(iter.into_iter().map(|it| (it, ())))
82 }
83}
84
85impl FromIterator<Cid> for CidHashSet {
86 fn from_iter<T: IntoIterator<Item = Cid>>(iter: T) -> Self {
87 let mut this = Self::new();
88 this.extend(iter);
89 this
90 }
91}
92
93pub struct FileBackedCidHashSet {
96 db: parity_db::Db,
97 _dir: tempfile::TempDir,
99 lru: hashlink::LruCache<SmallCid, ()>,
100}
101
102impl FileBackedCidHashSet {
103 pub fn new(temp_dir_root: impl AsRef<Path>) -> anyhow::Result<Self> {
104 let dir = tempfile::tempdir_in(temp_dir_root.as_ref()).with_context(|| {
105 format!(
106 "failed to create temp dir in {}",
107 temp_dir_root.as_ref().display(),
108 )
109 })?;
110 let options = parity_db::Options {
111 path: dir.path().to_path_buf(),
112 sync_wal: false,
113 sync_data: false,
114 stats: false,
115 salt: None,
116 columns: vec![
117 parity_db::ColumnOptions {
118 uniform: true,
119 append_only: true,
120 ..Default::default()
121 },
122 parity_db::ColumnOptions {
123 append_only: true,
124 ..Default::default()
125 },
126 ],
127 compression_threshold: Default::default(),
128 };
129 let db = parity_db::Db::open_or_create(&options).with_context(|| {
130 format!(
131 "failed to create temp parity-db at {}",
132 options.path.display()
133 )
134 })?;
135 Ok(Self {
136 db,
137 _dir: dir,
138 #[allow(clippy::disallowed_methods)]
139 lru: hashlink::LruCache::new(2 << 19), })
141 }
142
143 pub fn new_in_temp_dir() -> anyhow::Result<Self> {
144 Self::new(std::env::temp_dir())
145 }
146}
147
148impl CidHashSetLike for FileBackedCidHashSet {
149 fn insert(&mut self, cid: Cid) -> anyhow::Result<bool> {
150 static EMPTY_VALUE: LazyLock<Bytes> = LazyLock::new(|| Bytes::from_static(&[]));
151
152 let small = SmallCid::from(cid);
153 if self.lru.get(&small).is_some() {
154 return Ok(false);
155 }
156
157 let (col, key) = match &small {
158 SmallCid::Inline(c) => (0, c.digest().to_vec()),
159 SmallCid::Indirect(u) => (1, u.inner().to_bytes()),
160 };
161 if self.db.get(col, &key).ok().flatten().is_some() {
162 self.lru.insert(small, ());
163 Ok(false)
164 } else {
165 self.db.commit_changes_bytes([(
166 col,
167 parity_db::Operation::Set(key, EMPTY_VALUE.clone()),
168 )])?;
169 self.lru.insert(small, ());
170 Ok(true)
171 }
172 }
173}
174
175#[cfg(test)]
176impl Default for FileBackedCidHashSet {
177 fn default() -> Self {
178 Self::new_in_temp_dir().expect("failed to create FileBackedCidHashSet")
179 }
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185 use ahash::HashSet;
186
187 #[quickcheck_macros::quickcheck]
188 fn test_cid_hashset(cids: HashSet<Cid>) {
189 let mut set = CidHashSet::default();
190 for cid in cids.iter() {
191 all_asserts::assert_true!(set.insert(*cid), "expected CID to be newly inserted");
192 }
193 for cid in cids.iter() {
194 all_asserts::assert_false!(set.insert(*cid), "expected CID to be present in the set");
195 }
196 }
197
198 #[quickcheck_macros::quickcheck]
199 fn test_file_backed_cid_hashset(cids: HashSet<Cid>) {
200 let mut set = FileBackedCidHashSet::default();
201 let dir = set._dir.path().to_path_buf();
202 for cid in cids.iter() {
203 all_asserts::assert_true!(
204 set.insert(*cid).unwrap(),
205 "expected CID to be newly inserted"
206 );
207 }
208 for cid in cids.iter() {
209 all_asserts::assert_false!(
210 set.insert(*cid).unwrap(),
211 "expected CID to be present in the set"
212 );
213 }
214 drop(set);
215 all_asserts::assert_false!(dir.exists(), "expected temporary directory to be deleted");
216 }
217}