#![cfg(feature = "unstable_project_ids")]
use std::collections::{HashMap, HashSet};
use std::path::Path;
use anyhow::{Context, Result};
use dsi_bitstream::traits::BigEndian;
use epserde::deser::Deserialize;
use lender::Lender;
#[allow(unused_imports)] use log::{self, debug, info, warn};
use sux::array::partial_array::{PartialArray, SparseIndex};
use swh_graph::graph_builder::GraphBuilder;
use swh_graph::swhid;
use webgraph::prelude::{BvGraphSeq, MemoryFlags};
use webgraph::traits::SequentialLabeling;
use swh_graph_stdlib::project_ids::compute_project_ids;
type SparsePartialArray<T> = PartialArray<T, SparseIndex<Box<[usize]>>>;
fn load_partialarray_to_map(path: &Path) -> Result<HashMap<usize, usize>> {
let pa = unsafe { SparsePartialArray::<usize>::mmap(path, Default::default()) }
.with_context(|| format!("Could not mmap or deserialize {}", path.display()))?;
let pa = pa.uncase();
Ok((0..pa.len())
.filter_map(|node| pa.get(node).map(|project| (node, *project)))
.collect())
}
fn load_bvgraph_to_set(path: &Path) -> anyhow::Result<HashSet<(usize, usize)>> {
let graph = BvGraphSeq::with_basename(path)
.endianness::<BigEndian>()
.flags(MemoryFlags::default())
.load()?;
let mut pairs = HashSet::new();
let mut lender = graph.iter();
while let Some((reached_rev, root_revs)) = lender.next() {
for root_rev in root_revs {
pairs.insert((reached_rev, root_rev));
}
}
Ok(pairs)
}
#[test]
fn multiple_connected_components() -> anyhow::Result<()> {
let mut builder = GraphBuilder::default();
let d1 = builder
.node(swhid!(swh:1:dir:0000000000000000000000000000000000000000))
.unwrap()
.done();
let d2 = builder
.node(swhid!(swh:1:dir:0000000000000000000000000000000000000001))
.unwrap()
.done();
let d3 = builder
.node(swhid!(swh:1:dir:0000000000000000000000000000000000000002))
.unwrap()
.done();
let d4 = builder
.node(swhid!(swh:1:dir:0000000000000000000000000000000000000003))
.unwrap()
.done();
builder.arc(d1, d2);
builder.arc(d2, d3);
builder.arc(d3, d4);
let a = builder
.node(swhid!(swh:1:rev:0000000000000000000000000000000000000010))
.unwrap()
.done();
let b = builder
.node(swhid!(swh:1:rev:0000000000000000000000000000000000000020))
.unwrap()
.done();
let c = builder
.node(swhid!(swh:1:rev:0000000000000000000000000000000000000030))
.unwrap()
.done();
let d = builder
.node(swhid!(swh:1:rev:0000000000000000000000000000000000000040))
.unwrap()
.done();
let e = builder
.node(swhid!(swh:1:rev:0000000000000000000000000000000000000050))
.unwrap()
.done();
let f = builder
.node(swhid!(swh:1:rev:0000000000000000000000000000000000000060))
.unwrap()
.done();
builder.arc(b, a);
builder.arc(b, c);
builder.arc(e, d);
builder.arc(f, d);
let graph = builder.done().unwrap();
let output_dir = tempfile::tempdir()?.into_path();
let bvgraph_path = output_dir.join("graph");
let (report, project_ids) = compute_project_ids(&graph, 5, &bvgraph_path)?;
let pairs = load_bvgraph_to_set(&bvgraph_path.join("rev_to_roots"))?;
let expected_set: HashSet<(usize, usize)> =
vec![(a, a), (b, a), (b, c), (c, c), (d, d), (e, d), (f, d)]
.into_iter()
.collect();
assert_eq!(pairs, expected_set);
let pairs = load_partialarray_to_map(&bvgraph_path.join("rev_to_project_id"))?;
let expected_set: HashMap<usize, usize> = vec![(a, 0), (b, 1), (c, 2), (d, 3), (e, 3), (f, 3)]
.into_iter()
.collect();
assert_eq!(pairs, expected_set);
let pairs = load_bvgraph_to_set(&bvgraph_path.join("project_id_to_roots"))?;
let expected_set: HashSet<(usize, usize)> =
vec![(0, a), (1, a), (1, c), (2, c), (3, d), (3, d)]
.into_iter()
.collect();
assert_eq!(pairs, expected_set);
assert_eq!(
report.to_string(),
"\
Statistics about root revisions\n\
----------------------------------\n\
Revisions: 6\n\
Initial revisions: 3 (50.00%)\n\
Initial revisions per reached revision:\n \
1: 5 (83.33%)\n \
2: 1 (16.67%)\n \
3: 0 (0.00%)\n \
4: 0 (0.00%)\n \
5+: 0 (0.00%)\nMaximum number of root revisions reaching a revision: 2\n"
);
let expected_project_ids =
HashMap::from([(vec![a], 0), (vec![a, c], 1), (vec![c], 2), (vec![d], 3)]);
assert_eq!(project_ids, expected_project_ids);
Ok(())
}
#[test]
fn diamond_graph() -> anyhow::Result<()> {
use swh_graph::graph_builder::GraphBuilder;
use swh_graph::swhid;
let mut builder = GraphBuilder::default();
let a = builder
.node(swhid!(swh:1:rev:0000000000000000000000000000000000000010))
.unwrap()
.done();
let b = builder
.node(swhid!(swh:1:rev:0000000000000000000000000000000000000020))
.unwrap()
.done();
let c = builder
.node(swhid!(swh:1:rev:0000000000000000000000000000000000000030))
.unwrap()
.done();
let d = builder
.node(swhid!(swh:1:rev:0000000000000000000000000000000000000040))
.unwrap()
.done();
builder.arc(b, a);
builder.arc(c, a);
builder.arc(d, b);
builder.arc(d, c);
let graph = builder.done().unwrap();
let output_dir = tempfile::tempdir()?.into_path();
let bvgraph_path = output_dir.join("graph");
let (_report, project_ids) = compute_project_ids(&graph, 5, &bvgraph_path)?;
let pairs = load_bvgraph_to_set(&bvgraph_path.join("rev_to_roots"))?;
let expected: HashSet<_> = vec![(a, a), (b, a), (c, a), (d, a)].into_iter().collect();
assert_eq!(pairs, expected);
let pairs = load_partialarray_to_map(&bvgraph_path.join("rev_to_project_id"))?;
let expected: HashMap<_, _> = vec![(a, 0), (b, 0), (c, 0), (d, 0)].into_iter().collect();
assert_eq!(pairs, expected);
let expected_project_ids = HashMap::from([(vec![a], 0)]);
assert_eq!(project_ids, expected_project_ids);
Ok(())
}
#[test]
fn merge_many_unrelated_histories() -> anyhow::Result<()> {
use swh_graph::graph_builder::GraphBuilder;
use swh_graph::swhid;
let mut builder = GraphBuilder::default();
let a = builder
.node(swhid!(swh:1:rev:0000000000000000000000000000000000000010))
.unwrap()
.done();
let b = builder
.node(swhid!(swh:1:rev:0000000000000000000000000000000000000020))
.unwrap()
.done();
let c = builder
.node(swhid!(swh:1:rev:0000000000000000000000000000000000000030))
.unwrap()
.done();
let d = builder
.node(swhid!(swh:1:rev:0000000000000000000000000000000000000040))
.unwrap()
.done();
let merge_abc = builder
.node(swhid!(swh:1:rev:0000000000000000000000000000000000000050))
.unwrap()
.done();
let merge_abcd = builder
.node(swhid!(swh:1:rev:0000000000000000000000000000000000000060))
.unwrap()
.done();
builder.arc(merge_abc, a);
builder.arc(merge_abc, b);
builder.arc(merge_abc, c);
builder.arc(merge_abcd, a);
builder.arc(merge_abcd, b);
builder.arc(merge_abcd, c);
builder.arc(merge_abcd, d);
let graph = builder.done().unwrap();
let output_dir = tempfile::tempdir()?.into_path();
let bvgraph_path = output_dir.join("graph");
let (_report, project_ids) = compute_project_ids(&graph, 5, &bvgraph_path)?;
let pairs = load_bvgraph_to_set(&bvgraph_path.join("rev_to_roots"))?;
let expected: HashSet<_> = vec![
(a, a),
(b, b),
(c, c),
(d, d),
(merge_abc, a),
(merge_abc, b),
(merge_abc, c),
(merge_abcd, a),
(merge_abcd, b),
(merge_abcd, c),
(merge_abcd, d),
]
.into_iter()
.collect();
assert_eq!(pairs, expected);
let expected_project_ids = HashMap::from([
(vec![a], 0),
(vec![b], 1),
(vec![c], 2),
(vec![d], 3),
(vec![a, b, c], 4),
(vec![a, b, c, d], 5),
]);
assert_eq!(project_ids, expected_project_ids);
Ok(())
}