use std::sync::{Arc, Mutex};
use anyhow::{Context, Result};
use clap::Parser;
use dsi_progress_logger::{progress_logger, ProgressLog};
use log::info;
use rayon::prelude::*;
use swh_graph::map::{Node2SWHID, Node2Type};
#[derive(Parser, Debug)]
#[command(about = "Build `.node2type.bin` from `.node2swhid.bin`. Example usage: cargo run --bin node2type -- swh/graph/example_dataset/compressed/example ", long_about = None)]
struct Args {
basename: String,
dst_basename: Option<String>,
}
pub fn main() -> Result<()> {
let args = Args::parse();
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
let node2swhid_path = format!("{}.node2swhid.bin", args.basename);
info!("loading node ID -> SWHID map from {node2swhid_path} ...");
let node2swhid = Node2SWHID::load(&node2swhid_path)
.with_context(|| format!("While loading the .node2swhid.bin file: {node2swhid_path}"))?;
let num_nodes = node2swhid.len();
let node2type_path = format!(
"{}.node2type.bin",
args.dst_basename.unwrap_or(args.basename)
);
let node2type = Node2Type::new(&node2type_path, num_nodes)
.with_context(|| format!("While creating the .node2type.bin file: {node2type_path}"))?;
let node2type = Arc::new(Mutex::new(node2type));
let mut pl = progress_logger!(
display_memory = true,
item_name = "node",
local_speed = true,
expected_updates = Some(num_nodes),
);
pl.start("iterating over node ID -> SWHID map ...");
let pl = Arc::new(Mutex::new(pl));
(0..num_nodes)
.into_par_iter()
.map(|node_id| (node_id, node2swhid.get(node_id).unwrap().node_type))
.chunks(100_000_000)
.for_each(|chunk| {
let mut node2type = node2type.lock().unwrap();
for (node_id, node_type) in &chunk {
node2type.set(*node_id, *node_type);
}
pl.lock().unwrap().update_with_count(chunk.len());
});
pl.lock().unwrap().done();
Ok(())
}