swh_graph/java_compat/mph/
gov.rs1use super::spooky::{spooky_short, spooky_short_rehash};
13use anyhow::Result;
14use std::fs::File;
15use std::io::BufReader;
16use std::io::Read;
17use std::path::Path;
18
19#[derive(Debug)]
39pub struct GOVMPH {
40 pub size: u64,
41 pub multiplier: u64,
42 pub global_seed: u64,
43 pub edge_offset_and_seed: Vec<u64>,
44 pub array: Vec<u64>,
45}
46
47impl GOVMPH {
48 pub fn size(&self) -> u64 {
49 self.size
50 }
51
52 pub fn get_byte_array(&self, key: &[u8]) -> u64 {
53 let signature = spooky_short(key, self.global_seed);
54 let bucket = ((((signature[0] as u128) >> 1) * (self.multiplier as u128)) >> 64) as u64;
55 let edge_offset_seed = self.edge_offset_and_seed[bucket as usize];
56 let bucket_offset = vertex_offset(edge_offset_seed);
57 let num_variables =
58 vertex_offset(self.edge_offset_and_seed[bucket as usize + 1]) - bucket_offset;
59 let e = signature_to_equation(&signature, edge_offset_seed & (!OFFSET_MASK), num_variables);
60 let eq_idx = (get_2bit_value(&self.array, e[0] + bucket_offset)
61 + get_2bit_value(&self.array, e[1] + bucket_offset)
62 + get_2bit_value(&self.array, e[2] + bucket_offset))
63 % 3;
64 let offset = count_nonzero_pairs(
65 bucket_offset,
66 bucket_offset + e[eq_idx as usize],
67 &self.array,
68 );
69 (edge_offset_seed & OFFSET_MASK) + offset
70 }
71}
72
73#[inline(always)]
74#[must_use]
75const fn count_non_zero_pairs_in_word(x: u64) -> u64 {
77 ((x | (x >> 1)) & 0x5555555555555555).count_ones() as u64
78}
79
80fn count_nonzero_pairs(start: u64, end: u64, array: &[u64]) -> u64 {
83 let mut block = start / 32;
84 let end_block = end / 32;
85 let start_offset = start % 32;
86 let end_offset = end % 32;
87
88 if block == end_block {
89 return count_non_zero_pairs_in_word(
90 (array[block as usize] & ((1 << (end_offset * 2)) - 1)) >> (start_offset * 2),
91 );
92 }
93
94 let mut pairs = 0;
95 if start_offset != 0 {
96 pairs += count_non_zero_pairs_in_word(array[block as usize] >> (start_offset * 2));
97 block += 1;
98 }
99 while block < end_block {
100 pairs += count_non_zero_pairs_in_word(array[block as usize]);
101 block += 1;
102 }
103 if end_offset != 0 {
104 pairs +=
105 count_non_zero_pairs_in_word(array[block as usize] & ((1 << (end_offset * 2)) - 1));
106 }
107 pairs
108}
109
110const OFFSET_MASK: u64 = u64::MAX >> 8;
111const C_TIMES_256: u64 = 281; #[inline(always)]
114#[must_use]
115fn signature_to_equation(signature: &[u64; 4], seed: u64, num_variables: u64) -> [u64; 3] {
116 let hash = spooky_short_rehash(signature, seed);
117 let shift = num_variables.leading_zeros();
118 let mask = (1_u64 << shift) - 1;
119 [
120 ((hash[0] & mask) * num_variables) >> shift,
121 ((hash[1] & mask) * num_variables) >> shift,
122 ((hash[2] & mask) * num_variables) >> shift,
123 ]
124}
125
126#[inline(always)]
127#[must_use]
128const fn vertex_offset(edge_offset_seed: u64) -> u64 {
129 ((edge_offset_seed & OFFSET_MASK) * C_TIMES_256) >> 8
130}
131
132#[inline(always)]
133#[must_use]
134const fn get_2bit_value(data: &[u64], mut pos: u64) -> u64 {
135 pos *= 2;
136 (data[(pos / 64) as usize] >> (pos % 64)) & 3
137}
138
139impl GOVMPH {
140 pub fn load<P: AsRef<Path>>(path: P) -> Result<Self> {
143 Self::load_reader(BufReader::new(File::open(path.as_ref())?))
144 }
145
146 pub fn load_reader<F: Read>(mut file: F) -> Result<Self> {
148 macro_rules! read {
149 ($file:expr, $type:ty) => {{
150 let mut buffer: [u8; core::mem::size_of::<$type>()] =
151 [0; core::mem::size_of::<$type>()];
152 $file.read_exact(&mut buffer)?;
153 <$type>::from_le_bytes(buffer)
154 }};
155 }
156
157 macro_rules! read_array {
158 ($file:expr, $type:ty) => {{
159 let len = read!($file, u64) as usize;
161 let mut buffer: Vec<u64> = vec![0; len];
162 $file.read_exact(bytemuck::cast_slice_mut(&mut buffer))?;
164 buffer
165 }};
166 }
167 let size = read!(file, u64);
169 let multiplier = read!(file, u64);
170 let global_seed = read!(file, u64);
171 let edge_offset_and_seed = read_array!(file, u64);
172 let array = read_array!(file, u64);
173
174 Ok(Self {
175 size,
176 multiplier,
177 global_seed,
178 edge_offset_and_seed,
179 array,
180 })
181 }
182}