use std::path::Path;
use anyhow::{bail, Context, Result};
use pthash::{DictionaryDictionary, Minimal, MurmurHash2_128, PartitionedPhf, Phf};
use crate::graph::NodeId;
use crate::java_compat::mph::gov::GOVMPH;
use crate::map::{MappedPermutation, Permutation};
use crate::utils::suffix_path;
use crate::SWHID;
pub trait SwhidMphf {
#[inline(always)]
fn hash_array(&self, swhid: &[u8; SWHID::BYTES_SIZE]) -> Option<NodeId> {
self.hash_swhid(&(*swhid).try_into().ok()?)
}
fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId>;
fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId>;
#[inline(always)]
fn hash_swhid(&self, swhid: &SWHID) -> Option<NodeId> {
self.hash_str(swhid.to_string())
}
}
#[diagnostic::on_unimplemented(
label = "requires LoadableSwhidMphf (not just SwhidMphf)",
note = "swh-graph v7.0.0 split some methods of the SwhidMphf trait into a new LoadableSwhidMphf trait."
)]
pub trait LoadableSwhidMphf: SwhidMphf {
type WithMappedPermutation: SwhidMphf;
fn load(basepath: impl AsRef<Path>) -> Result<Self>
where
Self: Sized;
fn with_mapped_permutation(
self,
basepath: impl AsRef<Path>,
) -> Result<Self::WithMappedPermutation>;
}
pub struct PermutedMphf<MPHF: SwhidMphf, P: Permutation> {
mphf: MPHF,
permutation: P,
}
impl<MPHF: SwhidMphf, P: Permutation> SwhidMphf for PermutedMphf<MPHF, P> {
#[inline(always)]
fn hash_array(&self, swhid: &[u8; SWHID::BYTES_SIZE]) -> Option<NodeId> {
self.mphf
.hash_array(swhid)
.and_then(|id| self.permutation.get(id))
}
#[inline(always)]
fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
self.mphf
.hash_str(swhid)
.and_then(|id| self.permutation.get(id))
}
#[inline(always)]
fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
self.mphf
.hash_str_array(swhid)
.and_then(|id| self.permutation.get(id))
}
#[inline(always)]
fn hash_swhid(&self, swhid: &SWHID) -> Option<NodeId> {
self.mphf
.hash_swhid(swhid)
.and_then(|id| self.permutation.get(id))
}
}
pub struct VecMphf {
pub swhids: Vec<SWHID>,
}
impl SwhidMphf for VecMphf {
fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
swhid
.as_ref()
.try_into()
.ok()
.and_then(|swhid: SWHID| self.hash_swhid(&swhid))
}
fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
String::from_utf8(swhid.to_vec())
.ok()
.and_then(|swhid| self.hash_str(swhid))
}
fn hash_swhid(&self, swhid: &SWHID) -> Option<NodeId> {
self.swhids.iter().position(|item| item == swhid)
}
}
impl LoadableSwhidMphf for GOVMPH {
type WithMappedPermutation = PermutedMphf<Self, MappedPermutation>;
fn load(basepath: impl AsRef<Path>) -> Result<Self>
where
Self: Sized,
{
let path = suffix_path(basepath, ".cmph");
GOVMPH::load(&path).with_context(|| format!("Could not load {}", path.display()))
}
fn with_mapped_permutation(
self,
basepath: impl AsRef<Path>,
) -> Result<Self::WithMappedPermutation> {
let suffix = ".order"; let permutation = MappedPermutation::load_unchecked(&suffix_path(&basepath, suffix))?;
Ok(PermutedMphf {
mphf: self,
permutation,
})
}
}
impl SwhidMphf for GOVMPH {
#[inline(always)]
fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
Some(self.get_byte_array(swhid.as_ref().as_bytes()) as usize)
}
#[inline(always)]
fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
Some(self.get_byte_array(swhid) as usize)
}
}
pub struct SwhidPthash(pub PartitionedPhf<Minimal, MurmurHash2_128, DictionaryDictionary>);
impl SwhidPthash {
pub fn load(path: impl AsRef<Path>) -> Result<Self>
where
Self: Sized,
{
let path = path.as_ref();
PartitionedPhf::load(path)
.with_context(|| format!("Could not load {}", path.display()))
.map(SwhidPthash)
}
}
pub(crate) struct HashableSWHID<T: AsRef<[u8]>>(pub T);
impl<T: AsRef<[u8]>> pthash::Hashable for HashableSWHID<T> {
type Bytes<'a>
= &'a T
where
T: 'a;
fn as_bytes(&self) -> Self::Bytes<'_> {
&self.0
}
}
impl LoadableSwhidMphf for SwhidPthash {
type WithMappedPermutation = PermutedMphf<Self, MappedPermutation>;
fn load(basepath: impl AsRef<Path>) -> Result<Self>
where
Self: Sized,
{
let path = suffix_path(basepath, ".pthash");
SwhidPthash::load(path)
}
fn with_mapped_permutation(
self,
basepath: impl AsRef<Path>,
) -> Result<Self::WithMappedPermutation> {
let suffix = ".pthash.order";
let permutation = MappedPermutation::load_unchecked(&suffix_path(&basepath, suffix))?;
Ok(PermutedMphf {
mphf: self,
permutation,
})
}
}
impl SwhidMphf for SwhidPthash {
#[inline(always)]
fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
Some(self.0.hash(HashableSWHID(swhid.as_ref().as_bytes())) as usize)
}
#[inline(always)]
fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
Some(self.0.hash(HashableSWHID(swhid)) as usize)
}
}
pub enum DynMphf {
Pthash(SwhidPthash),
GOV(GOVMPH),
}
impl std::fmt::Debug for DynMphf {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DynMphf::Pthash(_) => write!(f, "DynMphf::Pthash(_)"),
DynMphf::GOV(_) => write!(f, "DynMphf::GOV(_)"),
}
}
}
impl From<GOVMPH> for DynMphf {
#[inline(always)]
fn from(value: GOVMPH) -> DynMphf {
DynMphf::GOV(value)
}
}
impl From<SwhidPthash> for DynMphf {
#[inline(always)]
fn from(value: SwhidPthash) -> DynMphf {
DynMphf::Pthash(value)
}
}
impl LoadableSwhidMphf for DynMphf {
type WithMappedPermutation = PermutedDynMphf;
fn load(basepath: impl AsRef<Path>) -> Result<Self> {
let basepath = basepath.as_ref();
let pthash_path = suffix_path(basepath, ".pthash");
if pthash_path.exists() {
return LoadableSwhidMphf::load(basepath)
.map(Self::Pthash)
.with_context(|| format!("Could not load {}", pthash_path.display()));
}
let gov_path = suffix_path(basepath, ".cmph");
if gov_path.exists() {
return LoadableSwhidMphf::load(basepath)
.map(Self::GOV)
.with_context(|| format!("Could not load {}", gov_path.display()));
}
bail!(
"Cannot load MPH function, neither {} nor {} exists.",
pthash_path.display(),
gov_path.display()
);
}
fn with_mapped_permutation(
self,
basepath: impl AsRef<Path>,
) -> Result<Self::WithMappedPermutation> {
match self {
Self::Pthash(mphf) => mphf
.with_mapped_permutation(basepath)
.map(PermutedDynMphf::Pthash),
Self::GOV(mphf) => mphf
.with_mapped_permutation(basepath)
.map(PermutedDynMphf::GOV),
}
}
}
impl SwhidMphf for DynMphf {
#[inline(always)]
fn hash_array(&self, swhid: &[u8; SWHID::BYTES_SIZE]) -> Option<NodeId> {
match self {
Self::Pthash(mphf) => mphf.hash_array(swhid),
Self::GOV(mphf) => mphf.hash_array(swhid),
}
}
#[inline(always)]
fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
match self {
Self::Pthash(mphf) => mphf.hash_str(swhid),
Self::GOV(mphf) => mphf.hash_str(swhid),
}
}
#[inline(always)]
fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
match self {
Self::Pthash(mphf) => mphf.hash_str_array(swhid),
Self::GOV(mphf) => mphf.hash_str_array(swhid),
}
}
#[inline(always)]
fn hash_swhid(&self, swhid: &SWHID) -> Option<NodeId> {
match self {
Self::Pthash(mphf) => mphf.hash_swhid(swhid),
Self::GOV(mphf) => mphf.hash_swhid(swhid),
}
}
}
pub enum PermutedDynMphf {
Pthash(<SwhidPthash as LoadableSwhidMphf>::WithMappedPermutation),
GOV(<GOVMPH as LoadableSwhidMphf>::WithMappedPermutation),
}
impl SwhidMphf for PermutedDynMphf {
#[inline(always)]
fn hash_array(&self, swhid: &[u8; SWHID::BYTES_SIZE]) -> Option<NodeId> {
match self {
Self::Pthash(mphf) => mphf.hash_array(swhid),
Self::GOV(mphf) => mphf.hash_array(swhid),
}
}
#[inline(always)]
fn hash_str(&self, swhid: impl AsRef<str>) -> Option<NodeId> {
match self {
Self::Pthash(mphf) => mphf.hash_str(swhid),
Self::GOV(mphf) => mphf.hash_str(swhid),
}
}
#[inline(always)]
fn hash_str_array(&self, swhid: &[u8; 50]) -> Option<NodeId> {
match self {
Self::Pthash(mphf) => mphf.hash_str_array(swhid),
Self::GOV(mphf) => mphf.hash_str_array(swhid),
}
}
#[inline(always)]
fn hash_swhid(&self, swhid: &SWHID) -> Option<NodeId> {
match self {
Self::Pthash(mphf) => mphf.hash_swhid(swhid),
Self::GOV(mphf) => mphf.hash_swhid(swhid),
}
}
}