swh-graph-stdlib 13.0.0

Library of algorithms and data structures for swh-graph
Documentation
// Copyright (C) 2023-2026  The Software Heritage developers
// See the AUTHORS file at the top-level directory of this distribution
// License: GNU General Public License version 3, or any later version
// See top-level LICENSE file for more information

use std::path::PathBuf;

use anyhow::{Context, Result};
use clap::Parser;
#[allow(unused_imports)] // to keep debug!() around
use log::{self, debug, info, warn};
use swh_graph::mph::DynMphf;

use swh_graph_stdlib::project_ids::compute_project_ids;

#[derive(Parser, Debug)]
#[command()]
/// Groups revisions into a notion of "projects".
///
/// A project is a set of initial revisions that are referred to by revisions.
/// The revisions which have the same project id are deemed to belong to the same network
/// of forks of a common git history, therefore pertaining to the same FOSS project.
///
/// This command produces various BV graphs in the interest of accessing the project id
/// associated with any revision efficiently. Those are stored as BV graphs inside the supplied
/// output path:
/// * `rev_to_initial_revs`: from a revision to the initial revisions that it refers to
/// * `rev_to_project_id`: from a revision to its project id
/// * `project_id_to_initial_revs`: from a project id to the set of initial revisions that it corresponds to
/// * `project_id_to_revs`: transpose of the above
struct Args {
    graph_path: PathBuf,
    #[arg(long)]
    /// Path where to write the index
    output_path: PathBuf,
    #[arg(long, default_value_t = 96)]
    /// Number of partitions to use when sorting the pairs of initial and reachable revisions
    num_partitions: usize,
    #[arg(long, default_value_t = false)]
    /// Print statistics about the number of initial revisions after computing the map
    print_stats: bool,
}

pub fn main() -> Result<()> {
    let args = Args::parse();
    env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();

    info!("Loading graph...");
    let graph = swh_graph::graph::SwhBidirectionalGraph::new(args.graph_path)
        .context("Could not load graph")?
        .init_properties()
        .load_properties(|props| props.load_maps::<DynMphf>())
        .context("Could not load maps")?;
    let (stats, _) = compute_project_ids(&graph, args.num_partitions, &args.output_path)?;

    if args.print_stats {
        println!("{stats}");
    }
    Ok(())
}