1use std::path::Path;
9
10use anyhow::{bail, Context, Result};
11use pthash::{DictionaryDictionary, Minimal, MurmurHash2_128, PartitionedPhf, Phf};
12
13use crate::graph::NodeId;
14use crate::java_compat::mph::gov::GOVMPH;
15use crate::map::{MappedPermutation, Permutation};
16use crate::utils::suffix_path;
17use crate::SWHID;
18
19pub trait SwhidMphf {
24 #[inline(always)]
26 fn hash_array(&self, swhid: &[u8; SWHID::BYTES_SIZE]) -> Option<NodeId> {
27 self.hash_swhid(&(*swhid).try_into().ok()?)
28 }
29
30 fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId>;
32
33 fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId>;
35
36 #[inline(always)]
38 fn hash_swhid(&self, swhid: &SWHID) -> Option<NodeId> {
39 self.hash_str(swhid.to_string())
40 }
41}
42
43#[diagnostic::on_unimplemented(
44 label = "requires LoadableSwhidMphf (not just SwhidMphf)",
45 note = "swh-graph v7.0.0 split some methods of the SwhidMphf trait into a new LoadableSwhidMphf trait."
46)]
47pub trait LoadableSwhidMphf: SwhidMphf {
49 type WithMappedPermutation: SwhidMphf;
50
51 fn load(basepath: impl AsRef<Path>) -> Result<Self>
52 where
53 Self: Sized;
54
55 fn with_mapped_permutation(
57 self,
58 basepath: impl AsRef<Path>,
59 ) -> Result<Self::WithMappedPermutation>;
60}
61
62pub struct PermutedMphf<MPHF: SwhidMphf, P: Permutation> {
64 mphf: MPHF,
65 permutation: P,
66}
67
68impl<MPHF: SwhidMphf, P: Permutation> SwhidMphf for PermutedMphf<MPHF, P> {
69 #[inline(always)]
70 fn hash_array(&self, swhid: &[u8; SWHID::BYTES_SIZE]) -> Option<NodeId> {
71 self.mphf
72 .hash_array(swhid)
73 .and_then(|id| self.permutation.get(id))
74 }
75
76 #[inline(always)]
77 fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
78 self.mphf
79 .hash_str(swhid)
80 .and_then(|id| self.permutation.get(id))
81 }
82
83 #[inline(always)]
84 fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
85 self.mphf
86 .hash_str_array(swhid)
87 .and_then(|id| self.permutation.get(id))
88 }
89
90 #[inline(always)]
91 fn hash_swhid(&self, swhid: &SWHID) -> Option<NodeId> {
92 self.mphf
93 .hash_swhid(swhid)
94 .and_then(|id| self.permutation.get(id))
95 }
96}
97
98pub struct VecMphf {
100 pub swhids: Vec<SWHID>,
101}
102
103impl SwhidMphf for VecMphf {
104 fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
105 swhid
106 .as_ref()
107 .try_into()
108 .ok()
109 .and_then(|swhid: SWHID| self.hash_swhid(&swhid))
110 }
111
112 fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
113 String::from_utf8(swhid.to_vec())
114 .ok()
115 .and_then(|swhid| self.hash_str(swhid))
116 }
117
118 fn hash_swhid(&self, swhid: &SWHID) -> Option<NodeId> {
119 self.swhids.iter().position(|item| item == swhid)
120 }
121}
122
123impl LoadableSwhidMphf for GOVMPH {
124 type WithMappedPermutation = PermutedMphf<Self, MappedPermutation>;
125
126 fn load(basepath: impl AsRef<Path>) -> Result<Self>
127 where
128 Self: Sized,
129 {
130 let path = suffix_path(basepath, ".cmph");
131 GOVMPH::load(&path).with_context(|| format!("Could not load {}", path.display()))
132 }
133
134 fn with_mapped_permutation(
135 self,
136 basepath: impl AsRef<Path>,
137 ) -> Result<Self::WithMappedPermutation> {
138 let suffix = ".order"; let permutation = MappedPermutation::load_unchecked(&suffix_path(&basepath, suffix))?;
140 Ok(PermutedMphf {
141 mphf: self,
142 permutation,
143 })
144 }
145}
146
147impl SwhidMphf for GOVMPH {
148 #[inline(always)]
149 fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
150 Some(self.get_byte_array(swhid.as_ref().as_bytes()) as usize)
151 }
152
153 #[inline(always)]
154 fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
155 Some(self.get_byte_array(swhid) as usize)
156 }
157}
158
159pub struct SwhidPthash(pub PartitionedPhf<Minimal, MurmurHash2_128, DictionaryDictionary>);
160
161impl SwhidPthash {
162 pub fn load(path: impl AsRef<Path>) -> Result<Self>
163 where
164 Self: Sized,
165 {
166 let path = path.as_ref();
167 PartitionedPhf::load(path)
168 .with_context(|| format!("Could not load {}", path.display()))
169 .map(SwhidPthash)
170 }
171}
172
173pub(crate) struct HashableSWHID<T: AsRef<[u8]>>(pub T);
175
176impl<T: AsRef<[u8]>> pthash::Hashable for HashableSWHID<T> {
177 type Bytes<'a>
178 = &'a T
179 where
180 T: 'a;
181 fn as_bytes(&self) -> Self::Bytes<'_> {
182 &self.0
183 }
184}
185
186impl LoadableSwhidMphf for SwhidPthash {
187 type WithMappedPermutation = PermutedMphf<Self, MappedPermutation>;
188
189 fn load(basepath: impl AsRef<Path>) -> Result<Self>
190 where
191 Self: Sized,
192 {
193 let path = suffix_path(basepath, ".pthash");
194 SwhidPthash::load(path)
195 }
196
197 fn with_mapped_permutation(
198 self,
199 basepath: impl AsRef<Path>,
200 ) -> Result<Self::WithMappedPermutation> {
201 let suffix = ".pthash.order";
202 let permutation = MappedPermutation::load_unchecked(&suffix_path(&basepath, suffix))?;
203 Ok(PermutedMphf {
204 mphf: self,
205 permutation,
206 })
207 }
208}
209
210impl SwhidMphf for SwhidPthash {
211 #[inline(always)]
212 fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
213 Some(self.0.hash(HashableSWHID(swhid.as_ref().as_bytes())) as usize)
214 }
215
216 #[inline(always)]
217 fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
218 Some(self.0.hash(HashableSWHID(swhid)) as usize)
219 }
220}
221
222pub enum DynMphf {
227 Pthash(SwhidPthash),
228 GOV(GOVMPH),
229}
230
231impl std::fmt::Debug for DynMphf {
232 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
233 match self {
234 DynMphf::Pthash(_) => write!(f, "DynMphf::Pthash(_)"),
235 DynMphf::GOV(_) => write!(f, "DynMphf::GOV(_)"),
236 }
237 }
238}
239
240impl From<GOVMPH> for DynMphf {
241 fn from(value: GOVMPH) -> DynMphf {
242 DynMphf::GOV(value)
243 }
244}
245
246impl From<SwhidPthash> for DynMphf {
247 fn from(value: SwhidPthash) -> DynMphf {
248 DynMphf::Pthash(value)
249 }
250}
251
252impl LoadableSwhidMphf for DynMphf {
253 type WithMappedPermutation = PermutedDynMphf;
254
255 fn load(basepath: impl AsRef<Path>) -> Result<Self> {
256 let basepath = basepath.as_ref();
257
258 let pthash_path = suffix_path(basepath, ".pthash");
259 if pthash_path.exists() {
260 return LoadableSwhidMphf::load(basepath)
261 .map(Self::Pthash)
262 .with_context(|| format!("Could not load {}", pthash_path.display()));
263 }
264
265 let gov_path = suffix_path(basepath, ".cmph");
266 if gov_path.exists() {
267 return LoadableSwhidMphf::load(basepath)
268 .map(Self::GOV)
269 .with_context(|| format!("Could not load {}", gov_path.display()));
270 }
271
272 bail!(
273 "Cannot load MPH function, neither {} nor {} exists.",
274 pthash_path.display(),
275 gov_path.display()
276 );
277 }
278
279 fn with_mapped_permutation(
280 self,
281 basepath: impl AsRef<Path>,
282 ) -> Result<Self::WithMappedPermutation> {
283 match self {
284 Self::Pthash(mphf) => mphf
285 .with_mapped_permutation(basepath)
286 .map(PermutedDynMphf::Pthash),
287 Self::GOV(mphf) => mphf
288 .with_mapped_permutation(basepath)
289 .map(PermutedDynMphf::GOV),
290 }
291 }
292}
293
294impl SwhidMphf for DynMphf {
295 #[inline(always)]
296 fn hash_array(&self, swhid: &[u8; SWHID::BYTES_SIZE]) -> Option<NodeId> {
297 match self {
298 Self::Pthash(mphf) => mphf.hash_array(swhid),
299 Self::GOV(mphf) => mphf.hash_array(swhid),
300 }
301 }
302
303 #[inline(always)]
304 fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
305 match self {
306 Self::Pthash(mphf) => mphf.hash_str(swhid),
307 Self::GOV(mphf) => mphf.hash_str(swhid),
308 }
309 }
310
311 #[inline(always)]
312 fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
313 match self {
314 Self::Pthash(mphf) => mphf.hash_str_array(swhid),
315 Self::GOV(mphf) => mphf.hash_str_array(swhid),
316 }
317 }
318
319 #[inline(always)]
320 fn hash_swhid(&self, swhid: &SWHID) -> Option<NodeId> {
321 match self {
322 Self::Pthash(mphf) => mphf.hash_swhid(swhid),
323 Self::GOV(mphf) => mphf.hash_swhid(swhid),
324 }
325 }
326}
327
328pub enum PermutedDynMphf {
330 Pthash(<SwhidPthash as LoadableSwhidMphf>::WithMappedPermutation),
331 GOV(<GOVMPH as LoadableSwhidMphf>::WithMappedPermutation),
332}
333
334impl SwhidMphf for PermutedDynMphf {
335 #[inline(always)]
336 fn hash_array(&self, swhid: &[u8; SWHID::BYTES_SIZE]) -> Option<NodeId> {
337 match self {
338 Self::Pthash(mphf) => mphf.hash_array(swhid),
339 Self::GOV(mphf) => mphf.hash_array(swhid),
340 }
341 }
342
343 #[inline(always)]
344 fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
345 match self {
346 Self::Pthash(mphf) => mphf.hash_str(swhid),
347 Self::GOV(mphf) => mphf.hash_str(swhid),
348 }
349 }
350
351 #[inline(always)]
352 fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
353 match self {
354 Self::Pthash(mphf) => mphf.hash_str_array(swhid),
355 Self::GOV(mphf) => mphf.hash_str_array(swhid),
356 }
357 }
358
359 #[inline(always)]
360 fn hash_swhid(&self, swhid: &SWHID) -> Option<NodeId> {
361 match self {
362 Self::Pthash(mphf) => mphf.hash_swhid(swhid),
363 Self::GOV(mphf) => mphf.hash_swhid(swhid),
364 }
365 }
366}