use crate::*;
use anyhow::Result;
use dsi_bitstream::{dispatch::factory::CodesReaderFactoryHelper, prelude::*};
use mmap_rs::MmapFlags;
use std::path::PathBuf;
use tempfile::Builder;
use webgraph::graphs::union_graph::UnionGraph;
use webgraph::prelude::*;
#[derive(Parser, Debug)]
#[command(name = "simplify", about = "Makes a BvGraph simple (undirected and loopless) by adding missing arcs and removing loops, optionally applying a permutation.", long_about = None)]
pub struct CliArgs {
pub src: PathBuf,
pub dst: PathBuf,
#[arg(long)]
pub transposed: Option<PathBuf>,
#[clap(flatten)]
pub num_threads: NumThreadsArg,
#[clap(flatten)]
pub memory_usage: MemoryUsageArg,
#[clap(flatten)]
pub ca: CompressArgs,
#[arg(long)]
pub permutation: Option<PathBuf>,
}
pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
create_parent_dir(&args.dst)?;
match get_endianness(&args.src)?.as_str() {
#[cfg(feature = "be_bins")]
BE::NAME => simplify::<BE>(global_args, args),
#[cfg(feature = "le_bins")]
LE::NAME => simplify::<LE>(global_args, args),
e => panic!("Unknown endianness: {}", e),
}
}
fn no_ef_warn(basepath: impl AsRef<std::path::Path>) {
log::warn!(SEQ_PROC_WARN![], basepath.as_ref().display());
}
pub fn simplify<E: Endianness>(_global_args: GlobalArgs, args: CliArgs) -> Result<()>
where
MmapHelper<u32>: CodesReaderFactoryHelper<E>,
for<'a> LoadModeCodesReader<'a, E, Mmap>: BitSeek + Clone + Send + Sync,
{
let thread_pool = crate::get_thread_pool(args.num_threads.num_threads);
let target_endianness = args.ca.endianness.clone().unwrap_or_else(|| E::NAME.into());
let dir = Builder::new().prefix("transform_simplify_").tempdir()?;
let chunk_size = args.ca.chunk_size;
let bvgraphz = args.ca.bvgraphz;
let mut builder = BvCompConfig::new(&args.dst)
.with_comp_flags(args.ca.into())
.with_tmp_dir(&dir);
if bvgraphz {
builder = builder.with_chunk_size(chunk_size);
}
match (args.permutation, args.transposed) {
(None, Some(t_path)) => {
log::info!("Transposed graph provided, using it to simplify the graph");
let has_ef_graph =
std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file());
let has_ef_t_graph =
std::fs::metadata(t_path.with_extension("ef")).is_ok_and(|x| x.is_file());
match (has_ef_graph, has_ef_t_graph) {
(true, true) => {
log::info!("Both .ef files found, using simplify split");
let graph =
webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
.endianness::<E>()
.load()?;
let num_nodes = graph.num_nodes();
let graph_t =
webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&t_path)
.endianness::<E>()
.load()?;
if graph_t.num_nodes() != num_nodes {
anyhow::bail!(
"The number of nodes in the graph and its transpose do not match! {} != {}",
num_nodes,
graph_t.num_nodes()
);
}
let sorted = NoSelfLoopsGraph(UnionGraph(graph, graph_t));
thread_pool.install(|| {
builder.par_comp_lenders_endianness(
&sorted,
sorted.num_nodes(),
&target_endianness,
)
})?;
return Ok(());
}
(true, false) => {
no_ef_warn(&t_path);
}
(false, true) => {
no_ef_warn(&args.src);
}
(false, false) => {
no_ef_warn(&args.src);
no_ef_warn(&t_path);
}
}
let seq_graph =
webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
.endianness::<E>()
.load()?;
let num_nodes = seq_graph.num_nodes();
let seq_graph_t =
webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&t_path)
.endianness::<E>()
.load()?;
if seq_graph_t.num_nodes() != num_nodes {
anyhow::bail!(
"The number of nodes in the graph and its transpose do not match! {} != {}",
num_nodes,
seq_graph_t.num_nodes()
);
}
let sorted = NoSelfLoopsGraph(UnionGraph(seq_graph, seq_graph_t));
thread_pool.install(|| {
builder.par_comp_lenders_endianness(&sorted, sorted.num_nodes(), &target_endianness)
})?;
}
(Some(perm_path), None | Some(_)) => {
log::info!("Permutation provided, applying it to the graph");
let perm = JavaPermutation::mmap(perm_path, MmapFlags::RANDOM_ACCESS)?;
if std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file()) {
log::info!(".ef file found, using simplify split");
let graph =
webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
.endianness::<E>()
.load()?;
let perm_graph = PermutedGraph {
graph: &graph,
perm: &perm,
};
thread_pool.install(|| {
let sorted = webgraph::transform::simplify_split(
&perm_graph,
args.memory_usage.memory_usage,
)?;
builder.par_comp_lenders_endianness(
&sorted,
sorted.num_nodes(),
&target_endianness,
)
})?;
return Ok(());
}
no_ef_warn(&args.src);
let seq_graph =
webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
.endianness::<E>()
.load()?;
let perm_graph = PermutedGraph {
graph: &seq_graph,
perm: &perm,
};
let sorted =
webgraph::transform::simplify(&perm_graph, args.memory_usage.memory_usage)?;
thread_pool.install(|| {
builder.par_comp_lenders_endianness(&sorted, sorted.num_nodes(), &target_endianness)
})?;
}
(None, None) => {
log::info!(
"No permutation or transposed graph provided, computing the transpose on the fly"
);
if std::fs::metadata(args.src.with_extension("ef")).is_ok_and(|x| x.is_file()) {
log::info!(".ef file found, using simplify split");
let graph =
webgraph::graphs::bvgraph::random_access::BvGraph::with_basename(&args.src)
.endianness::<E>()
.load()?;
thread_pool.install(|| {
let sorted = webgraph::transform::simplify_split(
&graph,
args.memory_usage.memory_usage,
)?;
builder.par_comp_lenders_endianness(
&sorted,
sorted.num_nodes(),
&target_endianness,
)
})?;
return Ok(());
}
no_ef_warn(&args.src);
let seq_graph =
webgraph::graphs::bvgraph::sequential::BvGraphSeq::with_basename(&args.src)
.endianness::<E>()
.load()?;
let sorted =
webgraph::transform::simplify_sorted(seq_graph, args.memory_usage.memory_usage)?;
thread_pool.install(|| {
builder.par_comp_lenders_endianness(&sorted, sorted.num_nodes(), &target_endianness)
})?;
}
}
Ok(())
}