swh_graph/
graph.rs

1// Copyright (C) 2023-2025  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6//! Structures to manipulate the Software Heritage graph
7//!
8//! In order to load only what is necessary, these structures are initially created
9//! by calling [`SwhUnidirectionalGraph::new`] or [`SwhBidirectionalGraph::new`], then calling methods
10//! on them to progressively load additional data (`load_properties`, `load_all_properties`,
11//! `load_labels`)
12
13#![allow(clippy::type_complexity)]
14
15use std::borrow::Borrow;
16use std::collections::HashMap;
17use std::iter::Iterator;
18use std::path::{Path, PathBuf};
19
20use anyhow::{anyhow, bail, Context, Result};
21use dsi_progress_logger::{ConcurrentProgressLog, ProgressLog};
22use rayon::prelude::*;
23use webgraph::graphs::vec_graph::LabeledVecGraph;
24use webgraph::prelude::*;
25
26use crate::arc_iterators::{
27    DelabelingIterator, LabelTypingSuccessorIterator, LabeledArcIterator, LabeledSuccessorIterator,
28};
29// Useful trait to import as part of `use swh_graph::graph::*;`
30pub use crate::arc_iterators::IntoFlattenedLabeledArcsIterator;
31use crate::labeling::SwhLabeling;
32use crate::labels::{EdgeLabel, UntypedEdgeLabel};
33use crate::mph::LoadableSwhidMphf;
34use crate::properties;
35pub use crate::underlying_graph::DefaultUnderlyingGraph;
36use crate::utils::shuffle::par_iter_shuffled_range;
37use crate::utils::suffix_path;
38use crate::NodeType;
39
40/// Alias for [`usize`], which may become a newtype in a future version.
41pub type NodeId = usize;
42
43/// Supertrait of [`RandomAccessLabeling`] with methods to act like a [`RandomAccessGraph`].
44///
45/// If `Self` implements [`RandomAccessGraph`], then this is implemented as no-ops.
46/// Otherwise, it "unpeels" layers of zipping until it reaches the graph at the bottom:
47///
48/// - If `Self` is `Zip<L, R>`, this defers to `L` (aka. `Left<Zip<L, R>`).
49/// - If `Self` is `VecGraph` or `LabeledVecGraph<_>, this does the equivalent of deferring to `VecGraph`
50///   (through [`DelabelingIterator`] because it cannot create a new `VecGraph` without copying)
51pub trait UnderlyingGraph: RandomAccessLabeling {
52    type UnlabeledSuccessors<'succ>: IntoIterator<Item = NodeId>
53    where
54        Self: 'succ;
55
56    /// Workaround for some implementations of `<Self as RandomAccessLabeling>::num_arcs`
57    /// being missing
58    ///
59    /// `Zip::num_arcs` runs `assert_eq!(self.0.num_arcs(), self.1.num_arcs());`
60    /// but `BitStreamLabeling::num_arcs` always panics as of
61    /// f460742fe0f776df2248a5f09a3425b81eb70b07, so we can't use that.
62    fn num_arcs(&self) -> u64;
63    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool;
64    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_>;
65}
66
67impl<F: RandomAccessDecoderFactory> UnderlyingGraph for BvGraph<F> {
68    type UnlabeledSuccessors<'succ>
69        = <Self as RandomAccessLabeling>::Labels<'succ>
70    where
71        Self: 'succ;
72
73    fn num_arcs(&self) -> u64 {
74        <Self as RandomAccessLabeling>::num_arcs(self)
75    }
76    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
77        <Self as RandomAccessGraph>::has_arc(self, src_node_id, dst_node_id)
78    }
79    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_> {
80        <Self as RandomAccessGraph>::successors(self, node_id)
81    }
82}
83
84impl<G: UnderlyingGraph, L: RandomAccessLabeling> UnderlyingGraph for Zip<G, L> {
85    type UnlabeledSuccessors<'succ>
86        = <G as UnderlyingGraph>::UnlabeledSuccessors<'succ>
87    where
88        Self: 'succ;
89
90    fn num_arcs(&self) -> u64 {
91        <G as UnderlyingGraph>::num_arcs(&self.0)
92    }
93    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
94        self.0.has_arc(src_node_id, dst_node_id)
95    }
96    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_> {
97        self.0.unlabeled_successors(node_id)
98    }
99}
100
101impl UnderlyingGraph for VecGraph {
102    type UnlabeledSuccessors<'succ>
103        = <Self as RandomAccessLabeling>::Labels<'succ>
104    where
105        Self: 'succ;
106
107    fn num_arcs(&self) -> u64 {
108        <Self as RandomAccessLabeling>::num_arcs(self)
109    }
110    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
111        <Self as RandomAccessGraph>::has_arc(self, src_node_id, dst_node_id)
112    }
113    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_> {
114        <Self as RandomAccessGraph>::successors(self, node_id)
115    }
116}
117
118impl<L: Clone> UnderlyingGraph for LabeledVecGraph<L> {
119    type UnlabeledSuccessors<'succ>
120        = DelabelingIterator<<Self as RandomAccessLabeling>::Labels<'succ>>
121    where
122        Self: 'succ;
123
124    fn num_arcs(&self) -> u64 {
125        <Self as RandomAccessLabeling>::num_arcs(self)
126    }
127    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
128        for succ in self.unlabeled_successors(src_node_id) {
129            if succ == dst_node_id {
130                return true;
131            }
132        }
133        false
134    }
135    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_> {
136        DelabelingIterator {
137            successors: <Self as RandomAccessLabeling>::labels(self, node_id),
138        }
139    }
140}
141
142pub trait SwhGraph {
143    /// Return the base path of the graph
144    fn path(&self) -> &Path;
145
146    /// Returns whether the graph is in the `ori->snp->rel,rev->dir->cnt` direction
147    /// (with a few `dir->rev` arcs)
148    fn is_transposed(&self) -> bool;
149
150    /// Return the largest node id in the graph plus one.
151    ///
152    /// For graphs directly loaded from disk this is the actual number of nodes,
153    /// but it is an overestimate for some graphs (eg. [`Subgraph`](crate::views::Subgraph)).
154    ///
155    /// Use [`actual_num_nodes`](Self::actual_num_nodes) if you need the exact number of
156    /// nodes in the graph.
157    fn num_nodes(&self) -> usize;
158
159    /// Returns whether the given node id exists in the graph
160    ///
161    /// This is usually true iff `node_id < self.num_nodes()`, but may be false
162    /// when using a filtered view such as [`Subgraph`](crate::views::Subgraph).
163    fn has_node(&self, node_id: NodeId) -> bool {
164        node_id < self.num_nodes()
165    }
166
167    /// Return the number of arcs in the graph.
168    fn num_arcs(&self) -> u64;
169
170    /// Returns the number of nodes of each type, if known.
171    fn num_nodes_by_type(&self) -> Result<HashMap<NodeType, usize>> {
172        let path = self.path().with_extension("nodes.stats.txt");
173        std::fs::read_to_string(&path)
174            .with_context(|| format!("Could not read {}", path.display()))?
175            .lines()
176            .map(|line| {
177                let (type_, count) = line
178                    .split_once(' ')
179                    .with_context(|| format!("Unexpected line in {}: {}", path.display(), line))?;
180                let type_: NodeType = type_
181                    .parse()
182                    .map_err(|_| anyhow!("Unknown node type in {}: {}", path.display(), type_))?;
183                let count = count
184                    .parse()
185                    .map_err(|_| anyhow!("Invalid integer in {}: {}", path.display(), count))?;
186                Ok((type_, count))
187            })
188            .collect()
189    }
190    /// Returns the number of nodes in the graph, if known.
191    fn actual_num_nodes(&self) -> Result<usize> {
192        Ok(self.num_nodes_by_type()?.values().sum())
193    }
194    /// Returns the number of arcs of each type, if known.
195    fn num_arcs_by_type(&self) -> Result<HashMap<(NodeType, NodeType), usize>> {
196        let path = self.path().with_extension("edges.stats.txt");
197        std::fs::read_to_string(&path)
198            .with_context(|| format!("Could not read {}", path.display()))?
199            .lines()
200            .map(|line| {
201                let (type_, count) = line
202                    .split_once(' ')
203                    .with_context(|| format!("Unexpected line in {}: {}", path.display(), line))?;
204                let (src_type, dst_type) = type_
205                    .split_once(':')
206                    .with_context(|| format!("Unexpected line in {}: {}", path.display(), line))?;
207                let src_type: NodeType = src_type.parse().map_err(|_| {
208                    anyhow!("Unknown node type in {}: {}", path.display(), src_type)
209                })?;
210                let dst_type: NodeType = dst_type.parse().map_err(|_| {
211                    anyhow!("Unknown node type in {}: {}", path.display(), dst_type)
212                })?;
213                let count = count
214                    .parse()
215                    .map_err(|_| anyhow!("Invalid integer in {}: {}", path.display(), count))?;
216                Ok(((src_type, dst_type), count))
217            })
218            .collect()
219    }
220
221    /// Return whether there is an arc going from `src_node_id` to `dst_node_id`.
222    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool;
223
224    /// Returns an iterator on all the nodes
225    ///
226    /// Order is not guaranteed.
227    ///
228    /// Updates the progress logger on every node id from 0 to `self.num_nodes()`,
229    /// even those that are filtered out.
230    fn iter_nodes<'a>(
231        &'a self,
232        mut pl: impl ProgressLog + 'a,
233    ) -> impl Iterator<Item = NodeId> + 'a {
234        (0..self.num_nodes())
235            .inspect(move |_| pl.light_update())
236            .filter(|&node_id| self.has_node(node_id))
237    }
238    /// Returns a parallel iterator on all the nodes
239    ///
240    /// Order is not guaranteed.
241    ///
242    /// Updates the progress logger on every node id from 0 to `self.num_nodes()`,
243    /// even those that are filtered out.
244    fn par_iter_nodes<'a>(
245        &'a self,
246        pl: impl ConcurrentProgressLog + 'a,
247    ) -> impl ParallelIterator<Item = NodeId> + 'a
248    where
249        Self: Sync,
250    {
251        par_iter_shuffled_range(0..self.num_nodes())
252            .map_with(pl, move |pl, node_id| {
253                pl.update();
254                node_id
255            })
256            .filter(|&node_id| self.has_node(node_id))
257    }
258}
259
260pub trait SwhForwardGraph: SwhGraph {
261    type Successors<'succ>: IntoIterator<Item = usize>
262    where
263        Self: 'succ;
264
265    /// Return an [`IntoIterator`] over the successors of a node.
266    fn successors(&self, node_id: NodeId) -> Self::Successors<'_>;
267    /// Return the number of successors of a node.
268    fn outdegree(&self, node_id: NodeId) -> usize;
269}
270
271#[diagnostic::on_unimplemented(
272    label = "does not have forward labels loaded",
273    note = "Use `let graph = graph.load_labels()` to load them"
274)]
275pub trait SwhLabeledForwardGraph: SwhForwardGraph {
276    type LabeledArcs<'arc>: IntoIterator<Item = UntypedEdgeLabel>
277    where
278        Self: 'arc;
279    type LabeledSuccessors<'node>: IntoIterator<Item = (usize, Self::LabeledArcs<'node>)>
280        + IntoFlattenedLabeledArcsIterator<UntypedEdgeLabel>
281    where
282        Self: 'node;
283
284    /// Return an [`IntoIterator`] over the successors of a node along with a list of labels
285    /// of each arc
286    fn untyped_labeled_successors(&self, node_id: NodeId) -> Self::LabeledSuccessors<'_>;
287
288    /// Return an [`IntoIterator`] over the successors of a node along with a list of labels
289    /// of each arc
290    fn labeled_successors(
291        &self,
292        node_id: NodeId,
293    ) -> impl Iterator<Item = (usize, impl Iterator<Item = EdgeLabel>)>
294           + IntoFlattenedLabeledArcsIterator<EdgeLabel>
295    where
296        Self: SwhGraphWithProperties + Sized,
297        <Self as SwhGraphWithProperties>::Maps: crate::properties::Maps,
298    {
299        LabelTypingSuccessorIterator {
300            graph: self,
301            is_transposed: false,
302            src: node_id,
303            successors: self.untyped_labeled_successors(node_id).into_iter(),
304        }
305    }
306}
307
308#[diagnostic::on_unimplemented(
309    label = "does not have backward arcs loaded",
310    note = "Use SwhBidirectionalGraph instead of SwhUnidirectionalGraph if you don't already"
311)]
312pub trait SwhBackwardGraph: SwhGraph {
313    type Predecessors<'succ>: IntoIterator<Item = usize>
314    where
315        Self: 'succ;
316
317    /// Return an [`IntoIterator`] over the predecessors of a node.
318    fn predecessors(&self, node_id: NodeId) -> Self::Predecessors<'_>;
319    /// Return the number of predecessors of a node.
320    fn indegree(&self, node_id: NodeId) -> usize;
321}
322
323#[diagnostic::on_unimplemented(
324    label = "does not have backward labels loaded",
325    note = "Use SwhBidirectionalGraph instead of SwhUnidirectionalGraph if you don't already",
326    note = "and `let graph = graph.load_labels()`"
327)]
328pub trait SwhLabeledBackwardGraph: SwhBackwardGraph {
329    type LabeledArcs<'arc>: IntoIterator<Item = UntypedEdgeLabel>
330    where
331        Self: 'arc;
332    type LabeledPredecessors<'node>: IntoIterator<Item = (usize, Self::LabeledArcs<'node>)>
333        + IntoFlattenedLabeledArcsIterator<UntypedEdgeLabel>
334    where
335        Self: 'node;
336
337    /// Return an [`IntoIterator`] over the predecessors of a node along with a list of labels
338    /// of each arc
339    fn untyped_labeled_predecessors(&self, node_id: NodeId) -> Self::LabeledPredecessors<'_>;
340
341    /// Return an [`IntoIterator`] over the predecessors of a node along with a list of labels
342    /// of each arc
343    fn labeled_predecessors(
344        &self,
345        node_id: NodeId,
346    ) -> impl Iterator<Item = (usize, impl Iterator<Item = crate::labels::EdgeLabel>)>
347    where
348        Self: SwhGraphWithProperties + Sized,
349        <Self as SwhGraphWithProperties>::Maps: crate::properties::Maps,
350    {
351        LabelTypingSuccessorIterator {
352            graph: self,
353            is_transposed: true,
354            src: node_id,
355            successors: self.untyped_labeled_predecessors(node_id).into_iter(),
356        }
357    }
358}
359
360pub trait SwhGraphWithProperties: SwhGraph {
361    type Maps: properties::MaybeMaps;
362    type Timestamps: properties::MaybeTimestamps;
363    type Persons: properties::MaybePersons;
364    type Contents: properties::MaybeContents;
365    type Strings: properties::MaybeStrings;
366    type LabelNames: properties::MaybeLabelNames;
367
368    fn properties(
369        &self,
370    ) -> &properties::SwhGraphProperties<
371        Self::Maps,
372        Self::Timestamps,
373        Self::Persons,
374        Self::Contents,
375        Self::Strings,
376        Self::LabelNames,
377    >;
378}
379
380/// Alias for structures representing a graph with all arcs, arc labels, and node properties
381/// loaded.
382pub trait SwhFullGraph:
383    SwhLabeledForwardGraph
384    + SwhLabeledBackwardGraph
385    + SwhGraphWithProperties<
386        Maps: crate::properties::Maps,
387        Timestamps: crate::properties::Timestamps,
388        Persons: crate::properties::Persons,
389        Contents: crate::properties::Contents,
390        Strings: crate::properties::Strings,
391        LabelNames: crate::properties::LabelNames,
392    >
393{
394}
395
396impl<
397        G: SwhLabeledForwardGraph
398            + SwhLabeledBackwardGraph
399            + SwhGraphWithProperties<
400                Maps: crate::properties::Maps,
401                Timestamps: crate::properties::Timestamps,
402                Persons: crate::properties::Persons,
403                Contents: crate::properties::Contents,
404                Strings: crate::properties::Strings,
405                LabelNames: crate::properties::LabelNames,
406            >,
407    > SwhFullGraph for G
408{
409}
410
411/// Class representing the compressed Software Heritage graph in a single direction.
412///
413/// Type parameters:
414///
415/// * `P` is either `()` or `properties::SwhGraphProperties`, manipulated using
416///   [`load_properties`](SwhUnidirectionalGraph::load_properties) and
417///   [`load_all_properties`](SwhUnidirectionalGraph::load_all_properties)
418/// * G is the forward graph (either [`BvGraph`], or `Zip<BvGraph, SwhLabeling>`
419///   [`load_labels`](SwhUnidirectionalGraph::load_labels)
420pub struct SwhUnidirectionalGraph<P, G: UnderlyingGraph = DefaultUnderlyingGraph> {
421    basepath: PathBuf,
422    graph: G,
423    properties: P,
424}
425
426impl SwhUnidirectionalGraph<()> {
427    pub fn new(basepath: impl AsRef<Path>) -> Result<Self> {
428        let basepath = basepath.as_ref().to_owned();
429        let graph = DefaultUnderlyingGraph::new(&basepath)?;
430        Ok(Self::from_underlying_graph(basepath, graph))
431    }
432}
433
434impl<G: UnderlyingGraph> SwhUnidirectionalGraph<(), G> {
435    pub fn from_underlying_graph(basepath: PathBuf, graph: G) -> Self {
436        SwhUnidirectionalGraph {
437            basepath,
438            graph,
439            properties: (),
440        }
441    }
442}
443
444impl<P, G: UnderlyingGraph> SwhGraph for SwhUnidirectionalGraph<P, G> {
445    fn path(&self) -> &Path {
446        self.basepath.as_path()
447    }
448
449    fn is_transposed(&self) -> bool {
450        // Technically, users can load the 'graph-transposed' directly.
451        // However, unless they rename files, this will fail to load properties, because
452        // properties' file names wouldn't match the base path.
453        // As 'is_transposed' is only useful when checking node types (of an arc),
454        // this function is unlikely to be called in that scenario, so this should be fine.
455        false
456    }
457
458    fn num_nodes(&self) -> usize {
459        self.graph.num_nodes()
460    }
461
462    fn num_arcs(&self) -> u64 {
463        UnderlyingGraph::num_arcs(&self.graph)
464    }
465
466    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
467        self.graph.has_arc(src_node_id, dst_node_id)
468    }
469}
470
471impl<P, G: UnderlyingGraph> SwhForwardGraph for SwhUnidirectionalGraph<P, G> {
472    type Successors<'succ>
473        = <G as UnderlyingGraph>::UnlabeledSuccessors<'succ>
474    where
475        Self: 'succ;
476
477    /// Return an [`IntoIterator`] over the successors of a node.
478    fn successors(&self, node_id: NodeId) -> Self::Successors<'_> {
479        self.graph.unlabeled_successors(node_id)
480    }
481
482    /// Return the number of successors of a node.
483    fn outdegree(&self, node_id: NodeId) -> usize {
484        self.graph.outdegree(node_id)
485    }
486}
487
488impl<P, G: UnderlyingGraph> SwhLabeledForwardGraph for SwhUnidirectionalGraph<P, G>
489where
490    <G as SequentialLabeling>::Label: Pair<Left = NodeId, Right: IntoIterator<Item: Borrow<u64>>>,
491    for<'succ> <G as RandomAccessLabeling>::Labels<'succ>:
492        Iterator<Item = (usize, <<G as SequentialLabeling>::Label as Pair>::Right)>,
493{
494    type LabeledArcs<'arc> = LabeledArcIterator<<<<<G as RandomAccessLabeling>::Labels<'arc> as Iterator>::Item as Pair>::Right as IntoIterator>::IntoIter> where Self: 'arc;
495    type LabeledSuccessors<'succ>
496        = LabeledSuccessorIterator<<G as RandomAccessLabeling>::Labels<'succ>>
497    where
498        Self: 'succ;
499
500    fn untyped_labeled_successors(&self, node_id: NodeId) -> Self::LabeledSuccessors<'_> {
501        LabeledSuccessorIterator {
502            successors: self.graph.labels(node_id),
503        }
504    }
505}
506
507impl<
508        M: properties::MaybeMaps,
509        T: properties::MaybeTimestamps,
510        P: properties::MaybePersons,
511        C: properties::MaybeContents,
512        S: properties::MaybeStrings,
513        N: properties::MaybeLabelNames,
514        G: UnderlyingGraph,
515    > SwhUnidirectionalGraph<properties::SwhGraphProperties<M, T, P, C, S, N>, G>
516{
517    /// Enriches the graph with more properties mmapped from disk
518    ///
519    /// # Example
520    ///
521    /// ```no_run
522    /// # use std::path::PathBuf;
523    /// use swh_graph::java_compat::mph::gov::GOVMPH;
524    ///
525    /// swh_graph::graph::SwhUnidirectionalGraph::new(PathBuf::from("./graph"))
526    ///     .expect("Could not load graph")
527    ///     .init_properties()
528    ///     .load_properties(|properties| properties.load_maps::<GOVMPH>())
529    ///     .expect("Could not load SWHID maps")
530    ///     .load_properties(|properties| properties.load_timestamps())
531    ///     .expect("Could not load timestamps");
532    /// ```
533    pub fn load_properties<
534        M2: properties::MaybeMaps,
535        T2: properties::MaybeTimestamps,
536        P2: properties::MaybePersons,
537        C2: properties::MaybeContents,
538        S2: properties::MaybeStrings,
539        N2: properties::MaybeLabelNames,
540    >(
541        self,
542        loader: impl FnOnce(
543            properties::SwhGraphProperties<M, T, P, C, S, N>,
544        ) -> Result<properties::SwhGraphProperties<M2, T2, P2, C2, S2, N2>>,
545    ) -> Result<SwhUnidirectionalGraph<properties::SwhGraphProperties<M2, T2, P2, C2, S2, N2>, G>>
546    {
547        Ok(SwhUnidirectionalGraph {
548            properties: loader(self.properties)?,
549            basepath: self.basepath,
550            graph: self.graph,
551        })
552    }
553}
554
555impl<G: UnderlyingGraph> SwhUnidirectionalGraph<(), G> {
556    /// Prerequisite for `load_properties`
557    pub fn init_properties(
558        self,
559    ) -> SwhUnidirectionalGraph<
560        properties::SwhGraphProperties<
561            properties::NoMaps,
562            properties::NoTimestamps,
563            properties::NoPersons,
564            properties::NoContents,
565            properties::NoStrings,
566            properties::NoLabelNames,
567        >,
568        G,
569    > {
570        SwhUnidirectionalGraph {
571            properties: properties::SwhGraphProperties::new(&self.basepath, self.graph.num_nodes()),
572            basepath: self.basepath,
573            graph: self.graph,
574        }
575    }
576
577    /// Enriches the graph with all properties, mmapped from disk
578    ///
579    /// # Example
580    ///
581    /// ```no_run
582    /// # use std::path::PathBuf;
583    /// use swh_graph::java_compat::mph::gov::GOVMPH;
584    ///
585    /// swh_graph::graph::SwhUnidirectionalGraph::new(PathBuf::from("./graph"))
586    ///     .expect("Could not load graph")
587    ///     .load_all_properties::<GOVMPH>()
588    ///     .expect("Could not load properties");
589    /// ```
590    pub fn load_all_properties<MPHF: LoadableSwhidMphf>(
591        self,
592    ) -> Result<
593        SwhUnidirectionalGraph<
594            properties::SwhGraphProperties<
595                properties::MappedMaps<MPHF>,
596                properties::MappedTimestamps,
597                properties::MappedPersons,
598                properties::MappedContents,
599                properties::MappedStrings,
600                properties::MappedLabelNames,
601            >,
602            G,
603        >,
604    > {
605        self.init_properties()
606            .load_properties(|properties| properties.load_all())
607    }
608}
609
610impl<
611        MAPS: properties::MaybeMaps,
612        TIMESTAMPS: properties::MaybeTimestamps,
613        PERSONS: properties::MaybePersons,
614        CONTENTS: properties::MaybeContents,
615        STRINGS: properties::MaybeStrings,
616        LABELNAMES: properties::MaybeLabelNames,
617        G: UnderlyingGraph,
618    > SwhGraphWithProperties
619    for SwhUnidirectionalGraph<
620        properties::SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>,
621        G,
622    >
623{
624    type Maps = MAPS;
625    type Timestamps = TIMESTAMPS;
626    type Persons = PERSONS;
627    type Contents = CONTENTS;
628    type Strings = STRINGS;
629    type LabelNames = LABELNAMES;
630
631    fn properties(
632        &self,
633    ) -> &properties::SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>
634    {
635        &self.properties
636    }
637}
638
639impl<P, G: RandomAccessGraph + UnderlyingGraph> SwhUnidirectionalGraph<P, G> {
640    /// Consumes this graph and returns a new one that implements [`SwhLabeledForwardGraph`]
641    pub fn load_labels(self) -> Result<SwhUnidirectionalGraph<P, Zip<G, SwhLabeling>>> {
642        Ok(SwhUnidirectionalGraph {
643            // Note: keep british version of "labelled" here for compatibility with Java swh-graph
644            graph: zip_labels(self.graph, suffix_path(&self.basepath, "-labelled"))
645                .context("Could not load forward labels")?,
646            properties: self.properties,
647            basepath: self.basepath,
648        })
649    }
650}
651
652/// Class representing the compressed Software Heritage graph in both directions.
653///
654/// Type parameters:
655///
656/// * `P` is either `()` or `properties::SwhGraphProperties`, manipulated using
657///   [`load_properties`](SwhBidirectionalGraph::load_properties) and
658///   [`load_all_properties`](SwhBidirectionalGraph::load_all_properties)
659/// * FG is the forward graph (either [`BvGraph`], or `Zip<BvGraph, SwhLabeling>`
660///   after using [`load_forward_labels`](SwhBidirectionalGraph::load_forward_labels)
661/// * BG is the backward graph (either [`BvGraph`], or `Zip<BvGraph, SwhLabeling>`
662///   after using [`load_backward_labels`](SwhBidirectionalGraph::load_backward_labels)
663pub struct SwhBidirectionalGraph<
664    P,
665    FG: UnderlyingGraph = DefaultUnderlyingGraph,
666    BG: UnderlyingGraph = DefaultUnderlyingGraph,
667> {
668    basepath: PathBuf,
669    forward_graph: FG,
670    backward_graph: BG,
671    properties: P,
672}
673
674impl SwhBidirectionalGraph<()> {
675    pub fn new(basepath: impl AsRef<Path>) -> Result<Self> {
676        let basepath = basepath.as_ref().to_owned();
677        let forward_graph = DefaultUnderlyingGraph::new(&basepath)?;
678        let backward_graph = DefaultUnderlyingGraph::new(suffix_path(&basepath, "-transposed"))?;
679        Ok(Self::from_underlying_graphs(
680            basepath,
681            forward_graph,
682            backward_graph,
683        ))
684    }
685}
686
687impl<FG: UnderlyingGraph, BG: UnderlyingGraph> SwhBidirectionalGraph<(), FG, BG> {
688    pub fn from_underlying_graphs(
689        basepath: PathBuf,
690        forward_graph: FG,
691        backward_graph: BG,
692    ) -> Self {
693        SwhBidirectionalGraph {
694            basepath,
695            backward_graph,
696            forward_graph,
697            properties: (),
698        }
699    }
700}
701
702impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhGraph for SwhBidirectionalGraph<P, FG, BG> {
703    fn path(&self) -> &Path {
704        self.basepath.as_path()
705    }
706
707    fn is_transposed(&self) -> bool {
708        // Technically, users can load the 'graph-transposed' directly.
709        // However, unless they rename files, this will fail to load properties, because
710        // properties' file names wouldn't match the base path.
711        // As 'is_transposed' is only useful when checking node types (of an arc),
712        // this function is unlikely to be called in that scenario, so this should be fine.
713        false
714    }
715
716    fn num_nodes(&self) -> usize {
717        self.forward_graph.num_nodes()
718    }
719
720    fn num_arcs(&self) -> u64 {
721        UnderlyingGraph::num_arcs(&self.forward_graph)
722    }
723
724    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
725        self.forward_graph.has_arc(src_node_id, dst_node_id)
726    }
727}
728
729impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhForwardGraph
730    for SwhBidirectionalGraph<P, FG, BG>
731{
732    type Successors<'succ>
733        = <FG as UnderlyingGraph>::UnlabeledSuccessors<'succ>
734    where
735        Self: 'succ;
736
737    fn successors(&self, node_id: NodeId) -> Self::Successors<'_> {
738        self.forward_graph.unlabeled_successors(node_id)
739    }
740    fn outdegree(&self, node_id: NodeId) -> usize {
741        self.forward_graph.outdegree(node_id)
742    }
743}
744
745impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhLabeledForwardGraph
746    for SwhBidirectionalGraph<P, FG, BG>
747where
748    <FG as SequentialLabeling>::Label: Pair<Left = NodeId, Right: IntoIterator<Item: Borrow<u64>>>,
749    for<'succ> <FG as RandomAccessLabeling>::Labels<'succ>:
750        Iterator<Item = (usize, <<FG as SequentialLabeling>::Label as Pair>::Right)>,
751{
752    type LabeledArcs<'arc> = LabeledArcIterator<<<<<FG as RandomAccessLabeling>::Labels<'arc> as Iterator>::Item as Pair>::Right as IntoIterator>::IntoIter> where Self: 'arc;
753    type LabeledSuccessors<'succ>
754        = LabeledSuccessorIterator<<FG as RandomAccessLabeling>::Labels<'succ>>
755    where
756        Self: 'succ;
757
758    fn untyped_labeled_successors(&self, node_id: NodeId) -> Self::LabeledSuccessors<'_> {
759        LabeledSuccessorIterator {
760            successors: self.forward_graph.labels(node_id),
761        }
762    }
763}
764
765impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhBackwardGraph
766    for SwhBidirectionalGraph<P, FG, BG>
767{
768    type Predecessors<'succ>
769        = <BG as UnderlyingGraph>::UnlabeledSuccessors<'succ>
770    where
771        Self: 'succ;
772
773    fn predecessors(&self, node_id: NodeId) -> Self::Predecessors<'_> {
774        self.backward_graph.unlabeled_successors(node_id)
775    }
776
777    fn indegree(&self, node_id: NodeId) -> usize {
778        self.backward_graph.outdegree(node_id)
779    }
780}
781
782impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhLabeledBackwardGraph
783    for SwhBidirectionalGraph<P, FG, BG>
784where
785    <BG as SequentialLabeling>::Label: Pair<Left = NodeId, Right: IntoIterator<Item: Borrow<u64>>>,
786    for<'succ> <BG as RandomAccessLabeling>::Labels<'succ>:
787        Iterator<Item = (usize, <<BG as SequentialLabeling>::Label as Pair>::Right)>,
788{
789    type LabeledArcs<'arc> = LabeledArcIterator<<<<<BG as RandomAccessLabeling>::Labels<'arc> as Iterator>::Item as Pair>::Right as IntoIterator>::IntoIter> where Self: 'arc;
790    type LabeledPredecessors<'succ>
791        = LabeledSuccessorIterator<<BG as RandomAccessLabeling>::Labels<'succ>>
792    where
793        Self: 'succ;
794
795    fn untyped_labeled_predecessors(&self, node_id: NodeId) -> Self::LabeledPredecessors<'_> {
796        LabeledSuccessorIterator {
797            successors: self.backward_graph.labels(node_id),
798        }
799    }
800}
801
802impl<
803        M: properties::MaybeMaps,
804        T: properties::MaybeTimestamps,
805        P: properties::MaybePersons,
806        C: properties::MaybeContents,
807        S: properties::MaybeStrings,
808        N: properties::MaybeLabelNames,
809        BG: UnderlyingGraph,
810        FG: UnderlyingGraph,
811    > SwhBidirectionalGraph<properties::SwhGraphProperties<M, T, P, C, S, N>, FG, BG>
812{
813    /// Enriches the graph with more properties mmapped from disk
814    ///
815    /// # Example
816    ///
817    /// ```no_run
818    /// # use std::path::PathBuf;
819    /// use swh_graph::java_compat::mph::gov::GOVMPH;
820    /// use swh_graph::SwhGraphProperties;
821    ///
822    /// swh_graph::graph::SwhBidirectionalGraph::new(PathBuf::from("./graph"))
823    ///     .expect("Could not load graph")
824    ///     .init_properties()
825    ///     .load_properties(SwhGraphProperties::load_maps::<GOVMPH>)
826    ///     .expect("Could not load SWHID maps")
827    ///     .load_properties(SwhGraphProperties::load_timestamps)
828    ///     .expect("Could not load timestamps");
829    /// ```
830    pub fn load_properties<
831        M2: properties::MaybeMaps,
832        T2: properties::MaybeTimestamps,
833        P2: properties::MaybePersons,
834        C2: properties::MaybeContents,
835        S2: properties::MaybeStrings,
836        N2: properties::MaybeLabelNames,
837    >(
838        self,
839        loader: impl FnOnce(
840            properties::SwhGraphProperties<M, T, P, C, S, N>,
841        ) -> Result<properties::SwhGraphProperties<M2, T2, P2, C2, S2, N2>>,
842    ) -> Result<SwhBidirectionalGraph<properties::SwhGraphProperties<M2, T2, P2, C2, S2, N2>, FG, BG>>
843    {
844        Ok(SwhBidirectionalGraph {
845            properties: loader(self.properties)?,
846            basepath: self.basepath,
847            forward_graph: self.forward_graph,
848            backward_graph: self.backward_graph,
849        })
850    }
851}
852
853impl<FG: UnderlyingGraph, BG: UnderlyingGraph> SwhBidirectionalGraph<(), FG, BG> {
854    /// Prerequisite for `load_properties`
855    pub fn init_properties(
856        self,
857    ) -> SwhBidirectionalGraph<
858        properties::SwhGraphProperties<
859            properties::NoMaps,
860            properties::NoTimestamps,
861            properties::NoPersons,
862            properties::NoContents,
863            properties::NoStrings,
864            properties::NoLabelNames,
865        >,
866        FG,
867        BG,
868    > {
869        SwhBidirectionalGraph {
870            properties: properties::SwhGraphProperties::new(
871                &self.basepath,
872                self.forward_graph.num_nodes(),
873            ),
874            basepath: self.basepath,
875            forward_graph: self.forward_graph,
876            backward_graph: self.backward_graph,
877        }
878    }
879
880    /// Enriches the graph with more properties mmapped from disk
881    ///
882    /// # Example
883    ///
884    /// ```no_run
885    /// # use std::path::PathBuf;
886    /// use swh_graph::java_compat::mph::gov::GOVMPH;
887    ///
888    /// swh_graph::graph::SwhBidirectionalGraph::new(PathBuf::from("./graph"))
889    ///     .expect("Could not load graph")
890    ///     .load_all_properties::<GOVMPH>()
891    ///     .expect("Could not load properties");
892    /// ```
893    pub fn load_all_properties<MPHF: LoadableSwhidMphf>(
894        self,
895    ) -> Result<
896        SwhBidirectionalGraph<
897            properties::SwhGraphProperties<
898                properties::MappedMaps<MPHF>,
899                properties::MappedTimestamps,
900                properties::MappedPersons,
901                properties::MappedContents,
902                properties::MappedStrings,
903                properties::MappedLabelNames,
904            >,
905            FG,
906            BG,
907        >,
908    > {
909        self.init_properties()
910            .load_properties(|properties| properties.load_all())
911    }
912}
913
914impl<
915        MAPS: properties::MaybeMaps,
916        TIMESTAMPS: properties::MaybeTimestamps,
917        PERSONS: properties::MaybePersons,
918        CONTENTS: properties::MaybeContents,
919        STRINGS: properties::MaybeStrings,
920        LABELNAMES: properties::MaybeLabelNames,
921        BG: UnderlyingGraph,
922        FG: UnderlyingGraph,
923    > SwhGraphWithProperties
924    for SwhBidirectionalGraph<
925        properties::SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>,
926        FG,
927        BG,
928    >
929{
930    type Maps = MAPS;
931    type Timestamps = TIMESTAMPS;
932    type Persons = PERSONS;
933    type Contents = CONTENTS;
934    type Strings = STRINGS;
935    type LabelNames = LABELNAMES;
936
937    fn properties(
938        &self,
939    ) -> &properties::SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>
940    {
941        &self.properties
942    }
943}
944
945impl<P, FG: RandomAccessGraph + UnderlyingGraph, BG: UnderlyingGraph>
946    SwhBidirectionalGraph<P, FG, BG>
947{
948    /// Consumes this graph and returns a new one that implements [`SwhLabeledForwardGraph`]
949    pub fn load_forward_labels(self) -> Result<SwhBidirectionalGraph<P, Zip<FG, SwhLabeling>, BG>> {
950        Ok(SwhBidirectionalGraph {
951            forward_graph: zip_labels(self.forward_graph, suffix_path(&self.basepath, "-labelled"))
952                .context("Could not load forward labels")?,
953            backward_graph: self.backward_graph,
954            properties: self.properties,
955            basepath: self.basepath,
956        })
957    }
958}
959
960impl<P, FG: UnderlyingGraph, BG: RandomAccessGraph + UnderlyingGraph>
961    SwhBidirectionalGraph<P, FG, BG>
962{
963    /// Consumes this graph and returns a new one that implements [`SwhLabeledBackwardGraph`]
964    pub fn load_backward_labels(
965        self,
966    ) -> Result<SwhBidirectionalGraph<P, FG, Zip<BG, SwhLabeling>>> {
967        Ok(SwhBidirectionalGraph {
968            forward_graph: self.forward_graph,
969            backward_graph: zip_labels(
970                self.backward_graph,
971                // Note: keep british version of "labelled" here for compatibility with Java swh-graph
972                suffix_path(&self.basepath, "-transposed-labelled"),
973            )
974            .context("Could not load backward labels")?,
975            properties: self.properties,
976            basepath: self.basepath,
977        })
978    }
979}
980
981impl<P, FG: RandomAccessGraph + UnderlyingGraph, BG: RandomAccessGraph + UnderlyingGraph>
982    SwhBidirectionalGraph<P, FG, BG>
983{
984    /// Equivalent to calling both
985    /// [`load_forward_labels`](SwhBidirectionalGraph::load_forward_labels) and
986    /// [`load_backward_labels`](SwhBidirectionalGraph::load_backward_labels)
987    pub fn load_labels(
988        self,
989    ) -> Result<SwhBidirectionalGraph<P, Zip<FG, SwhLabeling>, Zip<BG, SwhLabeling>>> {
990        self.load_forward_labels()
991            .context("Could not load forward labels")?
992            .load_backward_labels()
993            .context("Could not load backward labels")
994    }
995}
996
997#[deprecated(
998    since = "5.2.0",
999    note = "please use `SwhUnidirectionalGraph::new` instead"
1000)]
1001/// Deprecated alias for [`SwhUnidirectionalGraph::new`]
1002pub fn load_unidirectional(basepath: impl AsRef<Path>) -> Result<SwhUnidirectionalGraph<()>> {
1003    SwhUnidirectionalGraph::new(basepath)
1004}
1005
1006#[deprecated(
1007    since = "5.2.0",
1008    note = "please use `SwhBidirectionalGraph::new` instead"
1009)]
1010/// Deprecated alias for [`SwhBidirectionalGraph::new`]
1011pub fn load_bidirectional(basepath: impl AsRef<Path>) -> Result<SwhBidirectionalGraph<()>> {
1012    SwhBidirectionalGraph::new(basepath)
1013}
1014
1015/// Loads a bidirectional graph, then all its properties, then its labels.
1016///
1017/// This is a shorthand for:
1018///
1019/// ```no_run
1020/// # use std::path::PathBuf;
1021/// # use anyhow::{Context, Result};
1022/// # use swh_graph::graph::SwhBidirectionalGraph;
1023/// # use swh_graph::mph::LoadableSwhidMphf;
1024/// #
1025/// # fn f<MPHF: LoadableSwhidMphf>() -> Result<()> {
1026/// # let basepath = PathBuf::from("./graph");
1027/// let graph = SwhBidirectionalGraph::new(basepath)
1028///     .context("Could not load graph")?
1029///     .load_all_properties::<MPHF>()
1030///     .context("Could not load properties")?
1031///     .load_labels()
1032///     .context("Could not load labels")?;
1033/// # Ok(())
1034/// # }
1035/// ```
1036///
1037/// When working on code made to be re-distributed, you should load only what is needed
1038/// (ie. load unidirectional if you don't need the backward graph, don't load properties
1039/// that are not needed, don't load labels unless they need to be read), so users can
1040/// run your code without a complete graph locally.
1041pub fn load_full<MPHF: LoadableSwhidMphf>(
1042    basepath: impl AsRef<Path>,
1043) -> Result<
1044    SwhBidirectionalGraph<
1045        properties::SwhGraphProperties<
1046            properties::MappedMaps<MPHF>,
1047            properties::MappedTimestamps,
1048            properties::MappedPersons,
1049            properties::MappedContents,
1050            properties::MappedStrings,
1051            properties::MappedLabelNames,
1052        >,
1053        Zip<DefaultUnderlyingGraph, SwhLabeling>,
1054        Zip<DefaultUnderlyingGraph, SwhLabeling>,
1055    >,
1056> {
1057    SwhBidirectionalGraph::new(basepath)
1058        .context("Could not load graph")?
1059        .load_all_properties()
1060        .context("Could not load properties")?
1061        .load_labels()
1062        .context("Could not load labels")
1063}
1064
1065fn zip_labels<G: RandomAccessGraph + UnderlyingGraph, P: AsRef<Path>>(
1066    graph: G,
1067    base_path: P,
1068) -> Result<Zip<G, SwhLabeling>> {
1069    let properties_path = suffix_path(&base_path, ".properties");
1070    let f = std::fs::File::open(&properties_path)
1071        .with_context(|| format!("Could not open {}", properties_path.display()))?;
1072    let map = java_properties::read(std::io::BufReader::new(f))?;
1073
1074    let graphclass = map
1075        .get("graphclass")
1076        .with_context(|| format!("Missing 'graphclass' from {}", properties_path.display()))?;
1077    // Note: keep british version of "labelled" here for compatibility with Java swh-graph
1078    if graphclass != "it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph" {
1079        bail!(
1080            "Expected graphclass in {} to be \"it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph\", got {:?}",
1081            properties_path.display(),
1082            graphclass
1083        );
1084    }
1085
1086    let labelspec = map
1087        .get("labelspec")
1088        .with_context(|| format!("Missing 'labelspec' from {}", properties_path.display()))?;
1089    let width = labelspec
1090        .strip_prefix("org.softwareheritage.graph.labels.SwhLabel(DirEntry,")
1091        .and_then(|labelspec| labelspec.strip_suffix(')'))
1092        .and_then(|labelspec| labelspec.parse::<usize>().ok());
1093    let width = match width {
1094        None =>
1095        bail!("Expected labelspec in {} to be \"org.softwareheritage.graph.labels.SwhLabel(DirEntry,<integer>)\" (where <integer> is a small integer, usually under 30), got {:?}", properties_path.display(), labelspec),
1096        Some(width) => width
1097    };
1098
1099    let labels = crate::labeling::mmap(&base_path, crate::labeling::SwhDeserializer::new(width))
1100        .with_context(|| {
1101            format!(
1102                "Could not load labeling from {}",
1103                base_path.as_ref().display()
1104            )
1105        })?;
1106
1107    Ok(Zip(graph, labels))
1108}