use std::collections::HashSet;
use std::path::PathBuf;
use anyhow::{bail, ensure, Context, Result};
use clap::Parser;
use dsi_progress_logger::{concurrent_progress_logger, ProgressLog};
use rayon::prelude::*;
use swh_graph::graph::*;
use swh_graph::mph::DynMphf;
use swh_graph::utils::shuffle::par_iter_shuffled_range;
use swh_graph::views::Transposed;
use swh_graph::NodeType;
#[derive(Parser, Debug)]
struct Args {
graph1: PathBuf,
graph2: PathBuf,
#[arg(long)]
skip_swhids: bool,
#[arg(long)]
skip_successors: bool,
#[arg(long)]
skip_arc_labels: bool,
#[arg(long)]
skip_properties: bool,
#[arg(long)]
allow_missing_duplicate_origin_arc_labels: bool,
#[arg(long)]
transposed: bool,
}
pub fn main() -> Result<()> {
let args = Args::parse();
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
let graph1 = SwhBidirectionalGraph::new(&args.graph1)
.with_context(|| format!("Could not load graph {}", args.graph1.display()))?
.load_all_properties::<DynMphf>()
.with_context(|| {
format!(
"Could not load properties from graph {}",
args.graph1.display()
)
})?
.load_labels()
.with_context(|| format!("Could not load labels from graph {}", args.graph1.display()))?;
let graph2 = SwhBidirectionalGraph::new(&args.graph2)
.with_context(|| format!("Could not load graph {}", args.graph2.display()))?
.load_all_properties::<DynMphf>()
.with_context(|| {
format!(
"Could not load properties from graph {}",
args.graph2.display()
)
})?
.load_labels()
.with_context(|| format!("Could not load labels from graph {}", args.graph2.display()))?;
ensure!(
graph1.num_nodes() == graph2.num_nodes(),
"{} has {} nodes, {} has {} nodes",
args.graph1.display(),
graph1.num_nodes(),
args.graph2.display(),
graph2.num_nodes()
);
if args.transposed {
let graph1 = Transposed(graph1);
let graph2 = Transposed(graph2);
if !args.skip_swhids {
check_swhids(&graph1, &graph2)?;
}
if !args.skip_successors {
check_successors(&graph1, &graph2)?;
}
if !args.skip_arc_labels {
check_labels(
&graph1,
&graph2,
args.allow_missing_duplicate_origin_arc_labels,
)?;
}
if !args.skip_properties {
check_properties(&graph1, &graph2)?;
}
} else {
if !args.skip_swhids {
check_swhids(&graph1, &graph2)?;
}
if !args.skip_successors {
check_successors(&graph1, &graph2)?;
}
if !args.skip_arc_labels {
check_labels(
&graph1,
&graph2,
args.allow_missing_duplicate_origin_arc_labels,
)?;
}
if !args.skip_properties {
check_properties(&graph1, &graph2)?;
}
}
Ok(())
}
fn check_swhids<G>(graph1: &G, graph2: &G) -> Result<()>
where
G: SwhGraph + SwhGraphWithProperties + Sync,
<G as SwhGraphWithProperties>::Maps: swh_graph::properties::Maps,
{
let num_nodes = graph2.num_nodes();
let mut pl = concurrent_progress_logger!(
display_memory = true,
local_speed = true,
item_name = "node",
expected_updates = Some(num_nodes),
);
pl.start("Checking SWHIDs...");
(0..num_nodes).into_par_iter().try_for_each_with(
pl.clone(),
|thread_pl, node| -> Result<()> {
if graph1.properties().swhid(node) != graph1.properties().swhid(node) {
log::error!(
"node {} is {} in {} and {} in {}",
node,
graph1.properties().swhid(node),
graph1.path().display(),
graph2.properties().swhid(node),
graph2.path().display()
);
bail!("SWHID mismatch")
}
thread_pl.light_update();
Ok(())
},
)?;
pl.done();
Ok(())
}
fn check_successors<G>(graph1: &G, graph2: &G) -> Result<()>
where
G: SwhGraph + SwhForwardGraph + SwhGraphWithProperties + Sync,
<G as SwhGraphWithProperties>::Maps: swh_graph::properties::Maps,
{
let num_nodes = graph2.num_nodes();
let mut pl = concurrent_progress_logger!(
display_memory = true,
local_speed = true,
item_name = "node",
expected_updates = Some(num_nodes),
);
pl.start("Checking successors...");
par_iter_shuffled_range(0..num_nodes).try_for_each_with(
pl.clone(),
|thread_pl, node| -> Result<()> {
let successors1: Vec<_> = graph1.successors(node).into_iter().collect();
let successors2: Vec<_> = graph2.successors(node).into_iter().collect();
if successors1 != successors2 {
log::error!(
"node {} has successors {} in {} and {} in {}",
node,
successors1
.into_iter()
.map(|succ| graph1.properties().swhid(succ).to_string())
.collect::<Vec<_>>()
.join(", "),
graph1.path().display(),
successors2
.into_iter()
.map(|succ| graph2.properties().swhid(succ).to_string())
.collect::<Vec<_>>()
.join(", "),
graph2.path().display()
);
bail!("Successors mismatch")
}
thread_pl.light_update();
Ok(())
},
)?;
pl.done();
Ok(())
}
fn check_labels<G>(
graph1: &G,
graph2: &G,
allow_missing_duplicate_origin_arc_labels: bool,
) -> Result<()>
where
G: SwhGraph + SwhLabeledForwardGraph + SwhGraphWithProperties + Sync,
<G as SwhGraphWithProperties>::Maps: swh_graph::properties::Maps,
{
assert_eq!(graph1.is_transposed(), graph2.is_transposed());
let num_nodes = graph2.num_nodes();
let mut pl = concurrent_progress_logger!(
display_memory = true,
local_speed = true,
item_name = "node",
expected_updates = Some(num_nodes),
);
pl.start("Checking arc labels...");
par_iter_shuffled_range(0..num_nodes).try_for_each_with(
pl.clone(),
|thread_pl, node| -> Result<()> {
for ((succ1, labels1), (succ2, labels2)) in graph1
.labeled_successors(node)
.zip(graph2.labeled_successors(node))
{
ensure!(succ1 == succ2, "Successors mismatch");
let labels1: Vec<_> = labels1.collect();
let labels2: Vec<_> = labels2.collect();
if labels1 != labels2 {
let is_ori_to_snp_arc = if graph1.is_transposed() {
graph1.properties().node_type(succ1) == NodeType::Origin
} else {
graph1.properties().node_type(node) == NodeType::Origin
};
if allow_missing_duplicate_origin_arc_labels && is_ori_to_snp_arc && labels1.iter().copied().collect::<HashSet<_>>() == labels2.iter().copied().collect::<HashSet<_>>() {
log::warn!(
"arc {} ({}) -> {} ({}) has same set of labels, but different counts ({} in {} and {} in {})",
graph1.properties().swhid(node),
node,
graph1.properties().swhid(succ1),
succ1,
labels1.len(),
graph1.path().display(),
labels2.len(),
graph2.path().display()
);
} else {
log::error!(
"arc {} ({}) -> {} ({}) has labels {:?} in {} and {:?} in {}",
graph1.properties().swhid(node),
node,
graph1.properties().swhid(succ1),
succ1,
labels1,
graph1.path().display(),
labels2,
graph2.path().display(),
);
bail!("Labels mismatch")
}
}
}
thread_pl.light_update();
Ok(())
})?;
pl.done();
Ok(())
}
fn check_properties<G>(graph1: &G, graph2: &G) -> Result<()>
where
G: SwhGraph + SwhForwardGraph + SwhGraphWithProperties + Sync,
<G as SwhGraphWithProperties>::Contents: swh_graph::properties::Contents,
<G as SwhGraphWithProperties>::Maps: swh_graph::properties::Maps,
<G as SwhGraphWithProperties>::Persons: swh_graph::properties::Persons,
<G as SwhGraphWithProperties>::Strings: swh_graph::properties::Strings,
<G as SwhGraphWithProperties>::Timestamps: swh_graph::properties::Timestamps,
{
let num_nodes = graph1.num_nodes();
macro_rules! check_property {
($property:ident) => {{
let mut pl = concurrent_progress_logger!(
display_memory = true,
local_speed = true,
item_name = "node",
expected_updates = Some(num_nodes),
);
pl.start(&format!("Checking {}...", stringify!($property)));
(0..num_nodes).into_par_iter().try_for_each_with(
pl.clone(),
|thread_pl, node| -> Result<()> {
if graph1.properties().$property(node) != graph2.properties().$property(node) {
log::error!(
"node {} ({}) has {} {:?} in {} and {:?} in {}",
node,
graph1.properties().swhid(node),
stringify!($property),
graph1.properties().$property(node),
graph1.path().display(),
graph2.properties().$property(node),
graph2.path().display()
);
}
thread_pl.light_update();
Ok(())
},
)?;
pl.done();
}};
}
check_property!(author_timestamp);
check_property!(author_timestamp_offset);
check_property!(committer_timestamp);
check_property!(committer_timestamp_offset);
check_property!(author_id);
check_property!(committer_id);
check_property!(content_length);
check_property!(is_skipped_content);
check_property!(message);
check_property!(tag_name);
Ok(())
}