swh-graph-stdlib 13.0.0

Library of algorithms and data structures for swh-graph
Documentation
// Copyright (C) 2026  The Software Heritage developers
// See the AUTHORS file at the top-level directory of this distribution
// License: GNU General Public License version 3, or any later version
// See top-level LICENSE file for more information

use anyhow::Result;
use rayon::prelude::*;
use std::collections::HashSet;
use swh_graph::graph::*;
use swh_graph::graph_builder::GraphBuilder;
use swh_graph::labels::{Visit, VisitStatus};
use swh_graph::{SWHID, swhid};
use swh_graph_stdlib::origins::*;

#[test]
fn test_normalize_origin_url() {
    assert_eq!(
        normalize_origin_url("https://github.com/user/repo.git"),
        "github.com/user/repo".to_string()
    );

    assert_eq!(
        normalize_origin_url("HTTP://Example.COM/Repo.git"),
        "example.com/repo".to_string()
    );

    assert_eq!(
        normalize_origin_url("git://example.com/repo"),
        "example.com/repo".to_string()
    );

    assert_eq!(
        normalize_origin_url("svn://example.com/repo"),
        "example.com/repo".to_string()
    );
}

fn get_builder() -> GraphBuilder {
    let mut builder = GraphBuilder::default();
    let ori0_url = "https://github.com/user/repo.git";
    let ori0_swhid = SWHID::from_origin_url(ori0_url);
    let ori0 = builder
        .node(ori0_swhid)
        .unwrap()
        .message(ori0_url.into())
        .done();
    let ori1 = builder
        .node(swhid!(swh:1:ori:0000000000000000000000000000000000000001))
        .unwrap()
        .message(b"git://github.com/user/repo".to_vec())
        .done();
    let ori2 = builder
        .node(swhid!(swh:1:ori:0000000000000000000000000000000000000002))
        .unwrap()
        .message(b"HTTP://Example.COM/Repo.git".to_vec())
        .done();
    let ori3 = builder
        .node(swhid!(swh:1:ori:0000000000000000000000000000000000000003))
        .unwrap()
        .message(b"svn://svn.example.com/repo".to_vec())
        .done();

    // Add visits, because edge-less nodes cannot exist.
    let snp = builder
        .node(swhid!(swh:1:snp:0000000000000000000000000000000000000000))
        .unwrap()
        .done();
    let visit = Visit::new(VisitStatus::Full, 1719568024).unwrap();
    builder.ori_arc(ori0, snp, visit.status(), visit.timestamp());
    builder.ori_arc(ori1, snp, visit.status(), visit.timestamp());
    builder.ori_arc(ori2, snp, visit.status(), visit.timestamp());
    builder.ori_arc(ori3, snp, visit.status(), visit.timestamp());

    builder
}

#[test]
fn test_fuzzy_find_origins() -> Result<()> {
    let builder = get_builder();

    let graph = builder.done()?;
    let props = graph.properties();

    let node_matches: HashSet<_> = fuzzy_find_origins(
        &graph,
        &[
            "http://github.com/User/Repo".to_string(), // should match: ori0, ori1
            "svn+ssh://svn.example.com/repo".to_string(), // should match: ori3
        ],
    )
    .collect();

    let ori0_url = "https://github.com/user/repo.git";
    let ori0_swhid = SWHID::from_origin_url(ori0_url);
    assert_eq!(
        node_matches,
        HashSet::from([
            (0, props.node_id(ori0_swhid)?),
            (
                0,
                props.node_id(swhid!(swh:1:ori:0000000000000000000000000000000000000001))?
            ),
            (
                1,
                props.node_id(swhid!(swh:1:ori:0000000000000000000000000000000000000003))?
            ),
        ])
    );
    Ok(())
}

#[test]
fn test_fuzzy_find_redundant_origins() -> Result<()> {
    let builder = get_builder();

    let graph = builder.done()?;
    let props = graph.properties();

    let node_matches: HashSet<_> = fuzzy_find_origins(
        &graph,
        &[
            "http://github.com/User/Repo".to_string(), // should match: ori0, ori1
            "http://github.com/User/Repo.git".to_string(), // should match: ori0, ori1
            "svn+ssh://svn.example.com/repo".to_string(), // should match: ori3
        ],
    )
    .collect();

    let ori0_url = "https://github.com/user/repo.git";
    let ori0_swhid = SWHID::from_origin_url(ori0_url);
    assert_eq!(
        node_matches,
        HashSet::from([
            (0, props.node_id(ori0_swhid)?),
            (
                0,
                props.node_id(swhid!(swh:1:ori:0000000000000000000000000000000000000001))?
            ),
            (1, props.node_id(ori0_swhid)?),
            (
                1,
                props.node_id(swhid!(swh:1:ori:0000000000000000000000000000000000000001))?
            ),
            (
                2,
                props.node_id(swhid!(swh:1:ori:0000000000000000000000000000000000000003))?
            ),
        ])
    );
    Ok(())
}

#[test]
fn test_find_origin() -> Result<()> {
    let builder = get_builder();

    let graph = builder.done()?;
    let props = graph.properties();

    // Precise, non-fuzzy match
    let ori0_url = "https://github.com/user/repo.git";
    let ori0_swhid = SWHID::from_origin_url(ori0_url);
    assert_eq!(
        fuzzy_find_origin(&graph, "https://github.com/user/repo.git"),
        Some(props.node_id(ori0_swhid)?)
    );
    // Fuzzy match
    assert_eq!(
        fuzzy_find_origin(&graph, "http://example.com/repo"),
        Some(props.node_id(swhid!(swh:1:ori:0000000000000000000000000000000000000002))?)
    );
    // No match
    assert_eq!(
        fuzzy_find_origin(&graph, "http://not-exist.com/this/repo"),
        None
    );
    Ok(())
}