1use std::path::Path;
9
10use anyhow::{bail, Context, Result};
11use pthash::{DictionaryDictionary, Minimal, MurmurHash2_128, PartitionedPhf, Phf};
12
13use crate::graph::NodeId;
14use crate::java_compat::mph::gov::GOVMPH;
15use crate::map::{MappedPermutation, Permutation};
16use crate::utils::suffix_path;
17use crate::SWHID;
18
19pub trait SwhidMphf {
24 #[inline(always)]
26 fn hash_array(&self, swhid: &[u8; SWHID::BYTES_SIZE]) -> Option<NodeId> {
27 self.hash_swhid(&(*swhid).try_into().ok()?)
28 }
29
30 fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId>;
32
33 fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId>;
35
36 #[inline(always)]
38 fn hash_swhid(&self, swhid: &SWHID) -> Option<NodeId> {
39 self.hash_str(swhid.to_string())
40 }
41}
42
43#[diagnostic::on_unimplemented(
44 label = "requires LoadableSwhidMphf (not just SwhidMphf)",
45 note = "swh-graph v7.0.0 split some methods of the SwhidMphf trait into a new LoadableSwhidMphf trait."
46)]
47pub trait LoadableSwhidMphf: SwhidMphf {
49 type WithMappedPermutation: SwhidMphf;
50
51 fn load(basepath: impl AsRef<Path>) -> Result<Self>
52 where
53 Self: Sized;
54
55 fn with_mapped_permutation(
57 self,
58 basepath: impl AsRef<Path>,
59 ) -> Result<Self::WithMappedPermutation>;
60}
61
62pub struct PermutedMphf<MPHF: SwhidMphf, P: Permutation> {
64 mphf: MPHF,
65 permutation: P,
66}
67
68impl<MPHF: SwhidMphf, P: Permutation> SwhidMphf for PermutedMphf<MPHF, P> {
69 #[inline(always)]
70 fn hash_array(&self, swhid: &[u8; SWHID::BYTES_SIZE]) -> Option<NodeId> {
71 self.mphf
72 .hash_array(swhid)
73 .and_then(|id| self.permutation.get(id))
74 }
75
76 #[inline(always)]
77 fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
78 self.mphf
79 .hash_str(swhid)
80 .and_then(|id| self.permutation.get(id))
81 }
82
83 #[inline(always)]
84 fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
85 self.mphf
86 .hash_str_array(swhid)
87 .and_then(|id| self.permutation.get(id))
88 }
89
90 #[inline(always)]
91 fn hash_swhid(&self, swhid: &SWHID) -> Option<NodeId> {
92 self.mphf
93 .hash_swhid(swhid)
94 .and_then(|id| self.permutation.get(id))
95 }
96}
97
98pub struct VecMphf {
100 pub swhids: Vec<SWHID>,
101}
102
103impl SwhidMphf for VecMphf {
104 fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
105 swhid
106 .as_ref()
107 .try_into()
108 .ok()
109 .and_then(|swhid: SWHID| self.hash_swhid(&swhid))
110 }
111
112 fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
113 String::from_utf8(swhid.to_vec())
114 .ok()
115 .and_then(|swhid| self.hash_str(swhid))
116 }
117
118 fn hash_swhid(&self, swhid: &SWHID) -> Option<NodeId> {
119 self.swhids.iter().position(|item| item == swhid)
120 }
121}
122
123impl LoadableSwhidMphf for GOVMPH {
124 type WithMappedPermutation = PermutedMphf<Self, MappedPermutation>;
125
126 fn load(basepath: impl AsRef<Path>) -> Result<Self>
127 where
128 Self: Sized,
129 {
130 let path = suffix_path(basepath, ".cmph");
131 GOVMPH::load(&path).with_context(|| format!("Could not load {}", path.display()))
132 }
133
134 fn with_mapped_permutation(
135 self,
136 basepath: impl AsRef<Path>,
137 ) -> Result<Self::WithMappedPermutation> {
138 let suffix = ".order"; let permutation = MappedPermutation::load_unchecked(&suffix_path(&basepath, suffix))?;
140 Ok(PermutedMphf {
141 mphf: self,
142 permutation,
143 })
144 }
145}
146
147impl SwhidMphf for GOVMPH {
148 #[inline(always)]
149 fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
150 Some(self.get_byte_array(swhid.as_ref().as_bytes()) as usize)
151 }
152
153 #[inline(always)]
154 fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
155 Some(self.get_byte_array(swhid) as usize)
156 }
157}
158
159pub struct SwhidPthash(pub PartitionedPhf<Minimal, MurmurHash2_128, DictionaryDictionary>);
160
161impl SwhidPthash {
162 pub fn load(path: impl AsRef<Path>) -> Result<Self>
163 where
164 Self: Sized,
165 {
166 let path = path.as_ref();
167 PartitionedPhf::load(path)
168 .with_context(|| format!("Could not load {}", path.display()))
169 .map(SwhidPthash)
170 }
171}
172
173pub(crate) struct HashableSWHID<T: AsRef<[u8]>>(pub T);
175
176impl<T: AsRef<[u8]>> pthash::Hashable for HashableSWHID<T> {
177 type Bytes<'a>
178 = &'a T
179 where
180 T: 'a;
181 fn as_bytes(&self) -> Self::Bytes<'_> {
182 &self.0
183 }
184}
185
186impl LoadableSwhidMphf for SwhidPthash {
187 type WithMappedPermutation = PermutedMphf<Self, MappedPermutation>;
188
189 fn load(basepath: impl AsRef<Path>) -> Result<Self>
190 where
191 Self: Sized,
192 {
193 let path = suffix_path(basepath, ".pthash");
194 SwhidPthash::load(path)
195 }
196
197 fn with_mapped_permutation(
198 self,
199 basepath: impl AsRef<Path>,
200 ) -> Result<Self::WithMappedPermutation> {
201 let suffix = ".pthash.order";
202 let permutation = MappedPermutation::load_unchecked(&suffix_path(&basepath, suffix))?;
203 Ok(PermutedMphf {
204 mphf: self,
205 permutation,
206 })
207 }
208}
209
210impl SwhidMphf for SwhidPthash {
211 #[inline(always)]
212 fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
213 Some(self.0.hash(HashableSWHID(swhid.as_ref().as_bytes())) as usize)
214 }
215
216 #[inline(always)]
217 fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
218 Some(self.0.hash(HashableSWHID(swhid)) as usize)
219 }
220}
221
222pub enum DynMphf {
227 Pthash(SwhidPthash),
228 GOV(GOVMPH),
229}
230
231impl std::fmt::Debug for DynMphf {
232 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
233 match self {
234 DynMphf::Pthash(_) => write!(f, "DynMphf::Pthash(_)"),
235 DynMphf::GOV(_) => write!(f, "DynMphf::GOV(_)"),
236 }
237 }
238}
239
240impl From<GOVMPH> for DynMphf {
241 #[inline(always)]
242 fn from(value: GOVMPH) -> DynMphf {
243 DynMphf::GOV(value)
244 }
245}
246
247impl From<SwhidPthash> for DynMphf {
248 #[inline(always)]
249 fn from(value: SwhidPthash) -> DynMphf {
250 DynMphf::Pthash(value)
251 }
252}
253
254impl LoadableSwhidMphf for DynMphf {
255 type WithMappedPermutation = PermutedDynMphf;
256
257 fn load(basepath: impl AsRef<Path>) -> Result<Self> {
258 let basepath = basepath.as_ref();
259
260 let pthash_path = suffix_path(basepath, ".pthash");
261 if pthash_path.exists() {
262 return LoadableSwhidMphf::load(basepath)
263 .map(Self::Pthash)
264 .with_context(|| format!("Could not load {}", pthash_path.display()));
265 }
266
267 let gov_path = suffix_path(basepath, ".cmph");
268 if gov_path.exists() {
269 return LoadableSwhidMphf::load(basepath)
270 .map(Self::GOV)
271 .with_context(|| format!("Could not load {}", gov_path.display()));
272 }
273
274 bail!(
275 "Cannot load MPH function, neither {} nor {} exists.",
276 pthash_path.display(),
277 gov_path.display()
278 );
279 }
280
281 fn with_mapped_permutation(
282 self,
283 basepath: impl AsRef<Path>,
284 ) -> Result<Self::WithMappedPermutation> {
285 match self {
286 Self::Pthash(mphf) => mphf
287 .with_mapped_permutation(basepath)
288 .map(PermutedDynMphf::Pthash),
289 Self::GOV(mphf) => mphf
290 .with_mapped_permutation(basepath)
291 .map(PermutedDynMphf::GOV),
292 }
293 }
294}
295
296impl SwhidMphf for DynMphf {
297 #[inline(always)]
298 fn hash_array(&self, swhid: &[u8; SWHID::BYTES_SIZE]) -> Option<NodeId> {
299 match self {
300 Self::Pthash(mphf) => mphf.hash_array(swhid),
301 Self::GOV(mphf) => mphf.hash_array(swhid),
302 }
303 }
304
305 #[inline(always)]
306 fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
307 match self {
308 Self::Pthash(mphf) => mphf.hash_str(swhid),
309 Self::GOV(mphf) => mphf.hash_str(swhid),
310 }
311 }
312
313 #[inline(always)]
314 fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
315 match self {
316 Self::Pthash(mphf) => mphf.hash_str_array(swhid),
317 Self::GOV(mphf) => mphf.hash_str_array(swhid),
318 }
319 }
320
321 #[inline(always)]
322 fn hash_swhid(&self, swhid: &SWHID) -> Option<NodeId> {
323 match self {
324 Self::Pthash(mphf) => mphf.hash_swhid(swhid),
325 Self::GOV(mphf) => mphf.hash_swhid(swhid),
326 }
327 }
328}
329
330pub enum PermutedDynMphf {
332 Pthash(<SwhidPthash as LoadableSwhidMphf>::WithMappedPermutation),
333 GOV(<GOVMPH as LoadableSwhidMphf>::WithMappedPermutation),
334}
335
336impl SwhidMphf for PermutedDynMphf {
337 #[inline(always)]
338 fn hash_array(&self, swhid: &[u8; SWHID::BYTES_SIZE]) -> Option<NodeId> {
339 match self {
340 Self::Pthash(mphf) => mphf.hash_array(swhid),
341 Self::GOV(mphf) => mphf.hash_array(swhid),
342 }
343 }
344
345 #[inline(always)]
346 fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
347 match self {
348 Self::Pthash(mphf) => mphf.hash_str(swhid),
349 Self::GOV(mphf) => mphf.hash_str(swhid),
350 }
351 }
352
353 #[inline(always)]
354 fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
355 match self {
356 Self::Pthash(mphf) => mphf.hash_str_array(swhid),
357 Self::GOV(mphf) => mphf.hash_str_array(swhid),
358 }
359 }
360
361 #[inline(always)]
362 fn hash_swhid(&self, swhid: &SWHID) -> Option<NodeId> {
363 match self {
364 Self::Pthash(mphf) => mphf.hash_swhid(swhid),
365 Self::GOV(mphf) => mphf.hash_swhid(swhid),
366 }
367 }
368}