swh_graph/compress/
mph.rs1use std::path::PathBuf;
7
8use anyhow::{Context, Result};
9use dsi_progress_logger::{concurrent_progress_logger, ProgressLog};
10use pthash::{BuildConfiguration, PartitionedPhf, Phf};
11use rayon::prelude::*;
12
13use crate::compress::zst_dir::*;
14use crate::mph::{HashableSWHID, SwhidPthash};
15
16pub fn build_swhids_mphf(swhids_dir: PathBuf, num_nodes: usize) -> Result<SwhidPthash> {
18 let mut pass_counter = 0;
19 let iter_swhids = || {
20 pass_counter += 1;
21 let mut pl = concurrent_progress_logger!(
22 display_memory = true,
23 item_name = "SWHID",
24 local_speed = true,
25 expected_updates = Some(num_nodes),
26 );
27 pl.start(format!("Reading SWHIDs (pass #{pass_counter})"));
28 par_iter_lines_from_dir(&swhids_dir, pl).map(HashableSWHID::<Vec<u8>>)
29 };
30 let temp_dir = tempfile::tempdir().unwrap();
31
32 let mut config = BuildConfiguration::new(temp_dir.path().to_owned());
34 config.c = 5.;
35 config.alpha = 0.94;
36 config.num_partitions = num_nodes.div_ceil(10000000) as u64;
37 config.num_threads = num_cpus::get() as u64;
38
39 log::info!("Building MPH with parameters: {:?}", config);
40
41 let mut f = PartitionedPhf::new();
42 f.par_build_in_internal_memory_from_bytes(iter_swhids, &config)
43 .context("Failed to build MPH")?;
44 Ok(SwhidPthash(f))
45}