swh_graph/
graph.rs

1// Copyright (C) 2023-2025  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6//! Structures to manipulate the Software Heritage graph
7//!
8//! In order to load only what is necessary, these structures are initially created
9//! by calling [`SwhUnidirectionalGraph::new`] or [`SwhBidirectionalGraph::new`], then calling methods
10//! on them to progressively load additional data (`load_properties`, `load_all_properties`,
11//! `load_labels`)
12
13#![allow(clippy::type_complexity)]
14
15use std::borrow::Borrow;
16use std::collections::HashMap;
17use std::iter::Iterator;
18use std::path::{Path, PathBuf};
19
20use anyhow::{anyhow, bail, Context, Result};
21use dsi_progress_logger::{ConcurrentProgressLog, ProgressLog};
22use rayon::prelude::*;
23use webgraph::graphs::vec_graph::LabeledVecGraph;
24use webgraph::prelude::*;
25
26use crate::arc_iterators::{
27    DelabelingIterator, LabelTypingSuccessorIterator, LabeledArcIterator, LabeledSuccessorIterator,
28};
29// Useful trait to import as part of `use swh_graph::graph::*;`
30pub use crate::arc_iterators::IntoFlattenedLabeledArcsIterator;
31use crate::labeling::SwhLabeling;
32use crate::labels::{EdgeLabel, UntypedEdgeLabel};
33use crate::mph::LoadableSwhidMphf;
34use crate::properties;
35pub use crate::underlying_graph::DefaultUnderlyingGraph;
36use crate::utils::shuffle::par_iter_shuffled_range;
37use crate::utils::suffix_path;
38use crate::NodeType;
39
40/// Alias for [`usize`], which may become a newtype in a future version.
41pub type NodeId = usize;
42
43/// Supertrait of [`RandomAccessLabeling`] with methods to act like a [`RandomAccessGraph`].
44///
45/// If `Self` implements [`RandomAccessGraph`], then this is implemented as no-ops.
46/// Otherwise, it "unpeels" layers of zipping until it reaches the graph at the bottom:
47///
48/// - If `Self` is `Zip<L, R>`, this defers to `L` (aka. `Left<Zip<L, R>`).
49/// - If `Self` is `VecGraph` or `LabeledVecGraph<_>, this does the equivalent of deferring to `VecGraph`
50///   (through [`DelabelingIterator`] because it cannot create a new `VecGraph` without copying)
51pub trait UnderlyingGraph: RandomAccessLabeling {
52    type UnlabeledSuccessors<'succ>: IntoIterator<Item = NodeId>
53    where
54        Self: 'succ;
55
56    /// Workaround for some implementations of `<Self as RandomAccessLabeling>::num_arcs`
57    /// being missing
58    ///
59    /// `Zip::num_arcs` runs `assert_eq!(self.0.num_arcs(), self.1.num_arcs());`
60    /// but `BitStreamLabeling::num_arcs` always panics as of
61    /// f460742fe0f776df2248a5f09a3425b81eb70b07, so we can't use that.
62    fn num_arcs(&self) -> u64;
63    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool;
64    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_>;
65}
66
67impl<F: RandomAccessDecoderFactory> UnderlyingGraph for BvGraph<F> {
68    type UnlabeledSuccessors<'succ>
69        = <Self as RandomAccessLabeling>::Labels<'succ>
70    where
71        Self: 'succ;
72
73    fn num_arcs(&self) -> u64 {
74        <Self as RandomAccessLabeling>::num_arcs(self)
75    }
76    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
77        <Self as RandomAccessGraph>::has_arc(self, src_node_id, dst_node_id)
78    }
79    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_> {
80        <Self as RandomAccessGraph>::successors(self, node_id)
81    }
82}
83
84impl<G: UnderlyingGraph, L: RandomAccessLabeling> UnderlyingGraph for Zip<G, L> {
85    type UnlabeledSuccessors<'succ>
86        = <G as UnderlyingGraph>::UnlabeledSuccessors<'succ>
87    where
88        Self: 'succ;
89
90    fn num_arcs(&self) -> u64 {
91        <G as UnderlyingGraph>::num_arcs(&self.0)
92    }
93    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
94        self.0.has_arc(src_node_id, dst_node_id)
95    }
96    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_> {
97        self.0.unlabeled_successors(node_id)
98    }
99}
100
101impl UnderlyingGraph for VecGraph {
102    type UnlabeledSuccessors<'succ>
103        = <Self as RandomAccessLabeling>::Labels<'succ>
104    where
105        Self: 'succ;
106
107    fn num_arcs(&self) -> u64 {
108        <Self as RandomAccessLabeling>::num_arcs(self)
109    }
110    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
111        <Self as RandomAccessGraph>::has_arc(self, src_node_id, dst_node_id)
112    }
113    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_> {
114        <Self as RandomAccessGraph>::successors(self, node_id)
115    }
116}
117
118impl<L: Clone> UnderlyingGraph for LabeledVecGraph<L> {
119    type UnlabeledSuccessors<'succ>
120        = DelabelingIterator<<Self as RandomAccessLabeling>::Labels<'succ>>
121    where
122        Self: 'succ;
123
124    fn num_arcs(&self) -> u64 {
125        <Self as RandomAccessLabeling>::num_arcs(self)
126    }
127    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
128        for succ in self.unlabeled_successors(src_node_id) {
129            if succ == dst_node_id {
130                return true;
131            }
132        }
133        false
134    }
135    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_> {
136        DelabelingIterator {
137            successors: <Self as RandomAccessLabeling>::labels(self, node_id),
138        }
139    }
140}
141
142pub trait SwhGraph {
143    /// Return the base path of the graph
144    fn path(&self) -> &Path;
145
146    /// Returns whether the graph is in the `ori->snp->rel,rev->dir->cnt` direction
147    /// (with a few `dir->rev` arcs)
148    fn is_transposed(&self) -> bool;
149
150    /// Return the largest node id in the graph plus one.
151    ///
152    /// For graphs directly loaded from disk this is the actual number of nodes,
153    /// but it is an overestimate for some graphs (eg. [`Subgraph`](crate::views::Subgraph)).
154    ///
155    /// Use [`actual_num_nodes`](Self::actual_num_nodes) if you need the exact number of
156    /// nodes in the graph.
157    fn num_nodes(&self) -> usize;
158
159    /// Returns whether the given node id exists in the graph
160    ///
161    /// This is usually true iff `node_id < self.num_nodes()`, but may be false
162    /// when using a filtered view such as [`Subgraph`](crate::views::Subgraph).
163    fn has_node(&self, node_id: NodeId) -> bool {
164        node_id < self.num_nodes()
165    }
166
167    /// Return the number of arcs in the graph.
168    fn num_arcs(&self) -> u64;
169
170    /// Returns the number of nodes of each type, if known.
171    fn num_nodes_by_type(&self) -> Result<HashMap<NodeType, usize>> {
172        let path = self.path().with_extension("nodes.stats.txt");
173        std::fs::read_to_string(&path)
174            .with_context(|| format!("Could not read {}", path.display()))?
175            .lines()
176            .map(|line| {
177                let (type_, count) = line
178                    .split_once(' ')
179                    .with_context(|| format!("Unexpected line in {}: {}", path.display(), line))?;
180                let type_: NodeType = type_
181                    .parse()
182                    .map_err(|_| anyhow!("Unknown node type in {}: {}", path.display(), type_))?;
183                let count = count
184                    .parse()
185                    .map_err(|_| anyhow!("Invalid integer in {}: {}", path.display(), count))?;
186                Ok((type_, count))
187            })
188            .collect()
189    }
190    /// Returns the number of nodes in the graph, if known.
191    fn actual_num_nodes(&self) -> Result<usize> {
192        Ok(self.num_nodes_by_type()?.values().sum())
193    }
194    /// Returns the number of arcs of each type, if known.
195    fn num_arcs_by_type(&self) -> Result<HashMap<(NodeType, NodeType), usize>> {
196        let path = self.path().with_extension("edges.stats.txt");
197        std::fs::read_to_string(&path)
198            .with_context(|| format!("Could not read {}", path.display()))?
199            .lines()
200            .map(|line| {
201                let (type_, count) = line
202                    .split_once(' ')
203                    .with_context(|| format!("Unexpected line in {}: {}", path.display(), line))?;
204                let (src_type, dst_type) = type_
205                    .split_once(':')
206                    .with_context(|| format!("Unexpected line in {}: {}", path.display(), line))?;
207                let src_type: NodeType = src_type.parse().map_err(|_| {
208                    anyhow!("Unknown node type in {}: {}", path.display(), src_type)
209                })?;
210                let dst_type: NodeType = dst_type.parse().map_err(|_| {
211                    anyhow!("Unknown node type in {}: {}", path.display(), dst_type)
212                })?;
213                let count = count
214                    .parse()
215                    .map_err(|_| anyhow!("Invalid integer in {}: {}", path.display(), count))?;
216                Ok(((src_type, dst_type), count))
217            })
218            .collect()
219    }
220
221    /// Return whether there is an arc going from `src_node_id` to `dst_node_id`.
222    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool;
223
224    /// Returns an iterator on all the nodes
225    ///
226    /// Order is not guaranteed.
227    ///
228    /// Updates the progress logger on every node id from 0 to `self.num_nodes()`,
229    /// even those that are filtered out.
230    fn iter_nodes<'a>(
231        &'a self,
232        mut pl: impl ProgressLog + 'a,
233    ) -> impl Iterator<Item = NodeId> + 'a {
234        (0..self.num_nodes())
235            .inspect(move |_| pl.light_update())
236            .filter(|&node_id| self.has_node(node_id))
237    }
238    /// Returns a parallel iterator on all the nodes
239    ///
240    /// Order is not guaranteed.
241    ///
242    /// Updates the progress logger on every node id from 0 to `self.num_nodes()`,
243    /// even those that are filtered out.
244    fn par_iter_nodes<'a>(
245        &'a self,
246        pl: impl ConcurrentProgressLog + 'a,
247    ) -> impl ParallelIterator<Item = NodeId> + 'a
248    where
249        Self: Sync,
250    {
251        par_iter_shuffled_range(0..self.num_nodes())
252            .map_with(pl, move |pl, node_id| {
253                pl.update();
254                node_id
255            })
256            .filter(|&node_id| self.has_node(node_id))
257    }
258}
259
260pub trait SwhForwardGraph: SwhGraph {
261    type Successors<'succ>: IntoIterator<Item = usize>
262    where
263        Self: 'succ;
264
265    /// Return an [`IntoIterator`] over the successors of a node.
266    fn successors(&self, node_id: NodeId) -> Self::Successors<'_>;
267    /// Return the number of successors of a node.
268    fn outdegree(&self, node_id: NodeId) -> usize;
269}
270
271pub trait SwhLabeledForwardGraph: SwhForwardGraph {
272    type LabeledArcs<'arc>: IntoIterator<Item = UntypedEdgeLabel>
273    where
274        Self: 'arc;
275    type LabeledSuccessors<'node>: IntoIterator<Item = (usize, Self::LabeledArcs<'node>)>
276        + IntoFlattenedLabeledArcsIterator<UntypedEdgeLabel>
277    where
278        Self: 'node;
279
280    /// Return an [`IntoIterator`] over the successors of a node along with a list of labels
281    /// of each arc
282    fn untyped_labeled_successors(&self, node_id: NodeId) -> Self::LabeledSuccessors<'_>;
283
284    /// Return an [`IntoIterator`] over the successors of a node along with a list of labels
285    /// of each arc
286    fn labeled_successors(
287        &self,
288        node_id: NodeId,
289    ) -> impl Iterator<Item = (usize, impl Iterator<Item = EdgeLabel>)>
290           + IntoFlattenedLabeledArcsIterator<EdgeLabel>
291    where
292        Self: SwhGraphWithProperties + Sized,
293        <Self as SwhGraphWithProperties>::Maps: crate::properties::Maps,
294    {
295        LabelTypingSuccessorIterator {
296            graph: self,
297            is_transposed: false,
298            src: node_id,
299            successors: self.untyped_labeled_successors(node_id).into_iter(),
300        }
301    }
302}
303
304pub trait SwhBackwardGraph: SwhGraph {
305    type Predecessors<'succ>: IntoIterator<Item = usize>
306    where
307        Self: 'succ;
308
309    /// Return an [`IntoIterator`] over the predecessors of a node.
310    fn predecessors(&self, node_id: NodeId) -> Self::Predecessors<'_>;
311    /// Return the number of predecessors of a node.
312    fn indegree(&self, node_id: NodeId) -> usize;
313}
314
315pub trait SwhLabeledBackwardGraph: SwhBackwardGraph {
316    type LabeledArcs<'arc>: IntoIterator<Item = UntypedEdgeLabel>
317    where
318        Self: 'arc;
319    type LabeledPredecessors<'node>: IntoIterator<Item = (usize, Self::LabeledArcs<'node>)>
320        + IntoFlattenedLabeledArcsIterator<UntypedEdgeLabel>
321    where
322        Self: 'node;
323
324    /// Return an [`IntoIterator`] over the predecessors of a node along with a list of labels
325    /// of each arc
326    fn untyped_labeled_predecessors(&self, node_id: NodeId) -> Self::LabeledPredecessors<'_>;
327
328    /// Return an [`IntoIterator`] over the predecessors of a node along with a list of labels
329    /// of each arc
330    fn labeled_predecessors(
331        &self,
332        node_id: NodeId,
333    ) -> impl Iterator<Item = (usize, impl Iterator<Item = crate::labels::EdgeLabel>)>
334    where
335        Self: SwhGraphWithProperties + Sized,
336        <Self as SwhGraphWithProperties>::Maps: crate::properties::Maps,
337    {
338        LabelTypingSuccessorIterator {
339            graph: self,
340            is_transposed: true,
341            src: node_id,
342            successors: self.untyped_labeled_predecessors(node_id).into_iter(),
343        }
344    }
345}
346
347pub trait SwhGraphWithProperties: SwhGraph {
348    type Maps: properties::MaybeMaps;
349    type Timestamps: properties::MaybeTimestamps;
350    type Persons: properties::MaybePersons;
351    type Contents: properties::MaybeContents;
352    type Strings: properties::MaybeStrings;
353    type LabelNames: properties::MaybeLabelNames;
354
355    fn properties(
356        &self,
357    ) -> &properties::SwhGraphProperties<
358        Self::Maps,
359        Self::Timestamps,
360        Self::Persons,
361        Self::Contents,
362        Self::Strings,
363        Self::LabelNames,
364    >;
365}
366
367/// Alias for structures representing a graph with all arcs, arc labels, and node properties
368/// loaded.
369pub trait SwhFullGraph:
370    SwhLabeledForwardGraph
371    + SwhLabeledBackwardGraph
372    + SwhGraphWithProperties<
373        Maps: crate::properties::Maps,
374        Timestamps: crate::properties::Timestamps,
375        Persons: crate::properties::Persons,
376        Contents: crate::properties::Contents,
377        Strings: crate::properties::Strings,
378        LabelNames: crate::properties::LabelNames,
379    >
380{
381}
382
383impl<
384        G: SwhLabeledForwardGraph
385            + SwhLabeledBackwardGraph
386            + SwhGraphWithProperties<
387                Maps: crate::properties::Maps,
388                Timestamps: crate::properties::Timestamps,
389                Persons: crate::properties::Persons,
390                Contents: crate::properties::Contents,
391                Strings: crate::properties::Strings,
392                LabelNames: crate::properties::LabelNames,
393            >,
394    > SwhFullGraph for G
395{
396}
397
398/// Class representing the compressed Software Heritage graph in a single direction.
399///
400/// Type parameters:
401///
402/// * `P` is either `()` or `properties::SwhGraphProperties`, manipulated using
403///   [`load_properties`](SwhUnidirectionalGraph::load_properties) and
404///   [`load_all_properties`](SwhUnidirectionalGraph::load_all_properties)
405/// * G is the forward graph (either [`BvGraph`], or `Zip<BvGraph, SwhLabeling>`
406///   [`load_labels`](SwhUnidirectionalGraph::load_labels)
407pub struct SwhUnidirectionalGraph<P, G: UnderlyingGraph = DefaultUnderlyingGraph> {
408    basepath: PathBuf,
409    graph: G,
410    properties: P,
411}
412
413impl SwhUnidirectionalGraph<()> {
414    pub fn new(basepath: impl AsRef<Path>) -> Result<Self> {
415        let basepath = basepath.as_ref().to_owned();
416        let graph = DefaultUnderlyingGraph::new(&basepath)?;
417        Ok(Self::from_underlying_graph(basepath, graph))
418    }
419}
420
421impl<G: UnderlyingGraph> SwhUnidirectionalGraph<(), G> {
422    pub fn from_underlying_graph(basepath: PathBuf, graph: G) -> Self {
423        SwhUnidirectionalGraph {
424            basepath,
425            graph,
426            properties: (),
427        }
428    }
429}
430
431impl<P, G: UnderlyingGraph> SwhGraph for SwhUnidirectionalGraph<P, G> {
432    fn path(&self) -> &Path {
433        self.basepath.as_path()
434    }
435
436    fn is_transposed(&self) -> bool {
437        // Technically, users can load the 'graph-transposed' directly.
438        // However, unless they rename files, this will fail to load properties, because
439        // properties' file names wouldn't match the base path.
440        // As 'is_transposed' is only useful when checking node types (of an arc),
441        // this function is unlikely to be called in that scenario, so this should be fine.
442        false
443    }
444
445    fn num_nodes(&self) -> usize {
446        self.graph.num_nodes()
447    }
448
449    fn num_arcs(&self) -> u64 {
450        UnderlyingGraph::num_arcs(&self.graph)
451    }
452
453    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
454        self.graph.has_arc(src_node_id, dst_node_id)
455    }
456}
457
458impl<P, G: UnderlyingGraph> SwhForwardGraph for SwhUnidirectionalGraph<P, G> {
459    type Successors<'succ>
460        = <G as UnderlyingGraph>::UnlabeledSuccessors<'succ>
461    where
462        Self: 'succ;
463
464    /// Return an [`IntoIterator`] over the successors of a node.
465    fn successors(&self, node_id: NodeId) -> Self::Successors<'_> {
466        self.graph.unlabeled_successors(node_id)
467    }
468
469    /// Return the number of successors of a node.
470    fn outdegree(&self, node_id: NodeId) -> usize {
471        self.graph.outdegree(node_id)
472    }
473}
474
475impl<P, G: UnderlyingGraph> SwhLabeledForwardGraph for SwhUnidirectionalGraph<P, G>
476where
477    <G as SequentialLabeling>::Label: Pair<Left = NodeId, Right: IntoIterator<Item: Borrow<u64>>>,
478    for<'succ> <G as RandomAccessLabeling>::Labels<'succ>:
479        Iterator<Item = (usize, <<G as SequentialLabeling>::Label as Pair>::Right)>,
480{
481    type LabeledArcs<'arc> = LabeledArcIterator<<<<<G as RandomAccessLabeling>::Labels<'arc> as Iterator>::Item as Pair>::Right as IntoIterator>::IntoIter> where Self: 'arc;
482    type LabeledSuccessors<'succ>
483        = LabeledSuccessorIterator<<G as RandomAccessLabeling>::Labels<'succ>>
484    where
485        Self: 'succ;
486
487    fn untyped_labeled_successors(&self, node_id: NodeId) -> Self::LabeledSuccessors<'_> {
488        LabeledSuccessorIterator {
489            successors: self.graph.labels(node_id),
490        }
491    }
492}
493
494impl<
495        M: properties::MaybeMaps,
496        T: properties::MaybeTimestamps,
497        P: properties::MaybePersons,
498        C: properties::MaybeContents,
499        S: properties::MaybeStrings,
500        N: properties::MaybeLabelNames,
501        G: UnderlyingGraph,
502    > SwhUnidirectionalGraph<properties::SwhGraphProperties<M, T, P, C, S, N>, G>
503{
504    /// Enriches the graph with more properties mmapped from disk
505    ///
506    /// # Example
507    ///
508    /// ```no_run
509    /// # use std::path::PathBuf;
510    /// use swh_graph::java_compat::mph::gov::GOVMPH;
511    ///
512    /// swh_graph::graph::SwhUnidirectionalGraph::new(PathBuf::from("./graph"))
513    ///     .expect("Could not load graph")
514    ///     .init_properties()
515    ///     .load_properties(|properties| properties.load_maps::<GOVMPH>())
516    ///     .expect("Could not load SWHID maps")
517    ///     .load_properties(|properties| properties.load_timestamps())
518    ///     .expect("Could not load timestamps");
519    /// ```
520    pub fn load_properties<
521        M2: properties::MaybeMaps,
522        T2: properties::MaybeTimestamps,
523        P2: properties::MaybePersons,
524        C2: properties::MaybeContents,
525        S2: properties::MaybeStrings,
526        N2: properties::MaybeLabelNames,
527    >(
528        self,
529        loader: impl FnOnce(
530            properties::SwhGraphProperties<M, T, P, C, S, N>,
531        ) -> Result<properties::SwhGraphProperties<M2, T2, P2, C2, S2, N2>>,
532    ) -> Result<SwhUnidirectionalGraph<properties::SwhGraphProperties<M2, T2, P2, C2, S2, N2>, G>>
533    {
534        Ok(SwhUnidirectionalGraph {
535            properties: loader(self.properties)?,
536            basepath: self.basepath,
537            graph: self.graph,
538        })
539    }
540}
541
542impl<G: UnderlyingGraph> SwhUnidirectionalGraph<(), G> {
543    /// Prerequisite for `load_properties`
544    pub fn init_properties(
545        self,
546    ) -> SwhUnidirectionalGraph<
547        properties::SwhGraphProperties<
548            properties::NoMaps,
549            properties::NoTimestamps,
550            properties::NoPersons,
551            properties::NoContents,
552            properties::NoStrings,
553            properties::NoLabelNames,
554        >,
555        G,
556    > {
557        SwhUnidirectionalGraph {
558            properties: properties::SwhGraphProperties::new(&self.basepath, self.graph.num_nodes()),
559            basepath: self.basepath,
560            graph: self.graph,
561        }
562    }
563
564    /// Enriches the graph with all properties, mmapped from disk
565    ///
566    /// # Example
567    ///
568    /// ```no_run
569    /// # use std::path::PathBuf;
570    /// use swh_graph::java_compat::mph::gov::GOVMPH;
571    ///
572    /// swh_graph::graph::SwhUnidirectionalGraph::new(PathBuf::from("./graph"))
573    ///     .expect("Could not load graph")
574    ///     .load_all_properties::<GOVMPH>()
575    ///     .expect("Could not load properties");
576    /// ```
577    pub fn load_all_properties<MPHF: LoadableSwhidMphf>(
578        self,
579    ) -> Result<
580        SwhUnidirectionalGraph<
581            properties::SwhGraphProperties<
582                properties::MappedMaps<MPHF>,
583                properties::MappedTimestamps,
584                properties::MappedPersons,
585                properties::MappedContents,
586                properties::MappedStrings,
587                properties::MappedLabelNames,
588            >,
589            G,
590        >,
591    > {
592        self.init_properties()
593            .load_properties(|properties| properties.load_all())
594    }
595}
596
597impl<
598        MAPS: properties::MaybeMaps,
599        TIMESTAMPS: properties::MaybeTimestamps,
600        PERSONS: properties::MaybePersons,
601        CONTENTS: properties::MaybeContents,
602        STRINGS: properties::MaybeStrings,
603        LABELNAMES: properties::MaybeLabelNames,
604        G: UnderlyingGraph,
605    > SwhGraphWithProperties
606    for SwhUnidirectionalGraph<
607        properties::SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>,
608        G,
609    >
610{
611    type Maps = MAPS;
612    type Timestamps = TIMESTAMPS;
613    type Persons = PERSONS;
614    type Contents = CONTENTS;
615    type Strings = STRINGS;
616    type LabelNames = LABELNAMES;
617
618    fn properties(
619        &self,
620    ) -> &properties::SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>
621    {
622        &self.properties
623    }
624}
625
626impl<P, G: RandomAccessGraph + UnderlyingGraph> SwhUnidirectionalGraph<P, G> {
627    /// Consumes this graph and returns a new one that implements [`SwhLabeledForwardGraph`]
628    pub fn load_labels(self) -> Result<SwhUnidirectionalGraph<P, Zip<G, SwhLabeling>>> {
629        Ok(SwhUnidirectionalGraph {
630            // Note: keep british version of "labelled" here for compatibility with Java swh-graph
631            graph: zip_labels(self.graph, suffix_path(&self.basepath, "-labelled"))
632                .context("Could not load forward labels")?,
633            properties: self.properties,
634            basepath: self.basepath,
635        })
636    }
637}
638
639/// Class representing the compressed Software Heritage graph in both directions.
640///
641/// Type parameters:
642///
643/// * `P` is either `()` or `properties::SwhGraphProperties`, manipulated using
644///   [`load_properties`](SwhBidirectionalGraph::load_properties) and
645///   [`load_all_properties`](SwhBidirectionalGraph::load_all_properties)
646/// * FG is the forward graph (either [`BvGraph`], or `Zip<BvGraph, SwhLabeling>`
647///   after using [`load_forward_labels`](SwhBidirectionalGraph::load_forward_labels)
648/// * BG is the backward graph (either [`BvGraph`], or `Zip<BvGraph, SwhLabeling>`
649///   after using [`load_backward_labels`](SwhBidirectionalGraph::load_backward_labels)
650pub struct SwhBidirectionalGraph<
651    P,
652    FG: UnderlyingGraph = DefaultUnderlyingGraph,
653    BG: UnderlyingGraph = DefaultUnderlyingGraph,
654> {
655    basepath: PathBuf,
656    forward_graph: FG,
657    backward_graph: BG,
658    properties: P,
659}
660
661impl SwhBidirectionalGraph<()> {
662    pub fn new(basepath: impl AsRef<Path>) -> Result<Self> {
663        let basepath = basepath.as_ref().to_owned();
664        let forward_graph = DefaultUnderlyingGraph::new(&basepath)?;
665        let backward_graph = DefaultUnderlyingGraph::new(suffix_path(&basepath, "-transposed"))?;
666        Ok(Self::from_underlying_graphs(
667            basepath,
668            forward_graph,
669            backward_graph,
670        ))
671    }
672}
673
674impl<FG: UnderlyingGraph, BG: UnderlyingGraph> SwhBidirectionalGraph<(), FG, BG> {
675    pub fn from_underlying_graphs(
676        basepath: PathBuf,
677        forward_graph: FG,
678        backward_graph: BG,
679    ) -> Self {
680        SwhBidirectionalGraph {
681            basepath,
682            backward_graph,
683            forward_graph,
684            properties: (),
685        }
686    }
687}
688
689impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhGraph for SwhBidirectionalGraph<P, FG, BG> {
690    fn path(&self) -> &Path {
691        self.basepath.as_path()
692    }
693
694    fn is_transposed(&self) -> bool {
695        // Technically, users can load the 'graph-transposed' directly.
696        // However, unless they rename files, this will fail to load properties, because
697        // properties' file names wouldn't match the base path.
698        // As 'is_transposed' is only useful when checking node types (of an arc),
699        // this function is unlikely to be called in that scenario, so this should be fine.
700        false
701    }
702
703    fn num_nodes(&self) -> usize {
704        self.forward_graph.num_nodes()
705    }
706
707    fn num_arcs(&self) -> u64 {
708        UnderlyingGraph::num_arcs(&self.forward_graph)
709    }
710
711    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
712        self.forward_graph.has_arc(src_node_id, dst_node_id)
713    }
714}
715
716impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhForwardGraph
717    for SwhBidirectionalGraph<P, FG, BG>
718{
719    type Successors<'succ>
720        = <FG as UnderlyingGraph>::UnlabeledSuccessors<'succ>
721    where
722        Self: 'succ;
723
724    fn successors(&self, node_id: NodeId) -> Self::Successors<'_> {
725        self.forward_graph.unlabeled_successors(node_id)
726    }
727    fn outdegree(&self, node_id: NodeId) -> usize {
728        self.forward_graph.outdegree(node_id)
729    }
730}
731
732impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhLabeledForwardGraph
733    for SwhBidirectionalGraph<P, FG, BG>
734where
735    <FG as SequentialLabeling>::Label: Pair<Left = NodeId, Right: IntoIterator<Item: Borrow<u64>>>,
736    for<'succ> <FG as RandomAccessLabeling>::Labels<'succ>:
737        Iterator<Item = (usize, <<FG as SequentialLabeling>::Label as Pair>::Right)>,
738{
739    type LabeledArcs<'arc> = LabeledArcIterator<<<<<FG as RandomAccessLabeling>::Labels<'arc> as Iterator>::Item as Pair>::Right as IntoIterator>::IntoIter> where Self: 'arc;
740    type LabeledSuccessors<'succ>
741        = LabeledSuccessorIterator<<FG as RandomAccessLabeling>::Labels<'succ>>
742    where
743        Self: 'succ;
744
745    fn untyped_labeled_successors(&self, node_id: NodeId) -> Self::LabeledSuccessors<'_> {
746        LabeledSuccessorIterator {
747            successors: self.forward_graph.labels(node_id),
748        }
749    }
750}
751
752impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhBackwardGraph
753    for SwhBidirectionalGraph<P, FG, BG>
754{
755    type Predecessors<'succ>
756        = <BG as UnderlyingGraph>::UnlabeledSuccessors<'succ>
757    where
758        Self: 'succ;
759
760    fn predecessors(&self, node_id: NodeId) -> Self::Predecessors<'_> {
761        self.backward_graph.unlabeled_successors(node_id)
762    }
763
764    fn indegree(&self, node_id: NodeId) -> usize {
765        self.backward_graph.outdegree(node_id)
766    }
767}
768
769impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhLabeledBackwardGraph
770    for SwhBidirectionalGraph<P, FG, BG>
771where
772    <BG as SequentialLabeling>::Label: Pair<Left = NodeId, Right: IntoIterator<Item: Borrow<u64>>>,
773    for<'succ> <BG as RandomAccessLabeling>::Labels<'succ>:
774        Iterator<Item = (usize, <<BG as SequentialLabeling>::Label as Pair>::Right)>,
775{
776    type LabeledArcs<'arc> = LabeledArcIterator<<<<<BG as RandomAccessLabeling>::Labels<'arc> as Iterator>::Item as Pair>::Right as IntoIterator>::IntoIter> where Self: 'arc;
777    type LabeledPredecessors<'succ>
778        = LabeledSuccessorIterator<<BG as RandomAccessLabeling>::Labels<'succ>>
779    where
780        Self: 'succ;
781
782    fn untyped_labeled_predecessors(&self, node_id: NodeId) -> Self::LabeledPredecessors<'_> {
783        LabeledSuccessorIterator {
784            successors: self.backward_graph.labels(node_id),
785        }
786    }
787}
788
789impl<
790        M: properties::MaybeMaps,
791        T: properties::MaybeTimestamps,
792        P: properties::MaybePersons,
793        C: properties::MaybeContents,
794        S: properties::MaybeStrings,
795        N: properties::MaybeLabelNames,
796        BG: UnderlyingGraph,
797        FG: UnderlyingGraph,
798    > SwhBidirectionalGraph<properties::SwhGraphProperties<M, T, P, C, S, N>, FG, BG>
799{
800    /// Enriches the graph with more properties mmapped from disk
801    ///
802    /// # Example
803    ///
804    /// ```no_run
805    /// # use std::path::PathBuf;
806    /// use swh_graph::java_compat::mph::gov::GOVMPH;
807    /// use swh_graph::SwhGraphProperties;
808    ///
809    /// swh_graph::graph::SwhBidirectionalGraph::new(PathBuf::from("./graph"))
810    ///     .expect("Could not load graph")
811    ///     .init_properties()
812    ///     .load_properties(SwhGraphProperties::load_maps::<GOVMPH>)
813    ///     .expect("Could not load SWHID maps")
814    ///     .load_properties(SwhGraphProperties::load_timestamps)
815    ///     .expect("Could not load timestamps");
816    /// ```
817    pub fn load_properties<
818        M2: properties::MaybeMaps,
819        T2: properties::MaybeTimestamps,
820        P2: properties::MaybePersons,
821        C2: properties::MaybeContents,
822        S2: properties::MaybeStrings,
823        N2: properties::MaybeLabelNames,
824    >(
825        self,
826        loader: impl FnOnce(
827            properties::SwhGraphProperties<M, T, P, C, S, N>,
828        ) -> Result<properties::SwhGraphProperties<M2, T2, P2, C2, S2, N2>>,
829    ) -> Result<SwhBidirectionalGraph<properties::SwhGraphProperties<M2, T2, P2, C2, S2, N2>, FG, BG>>
830    {
831        Ok(SwhBidirectionalGraph {
832            properties: loader(self.properties)?,
833            basepath: self.basepath,
834            forward_graph: self.forward_graph,
835            backward_graph: self.backward_graph,
836        })
837    }
838}
839
840impl<FG: UnderlyingGraph, BG: UnderlyingGraph> SwhBidirectionalGraph<(), FG, BG> {
841    /// Prerequisite for `load_properties`
842    pub fn init_properties(
843        self,
844    ) -> SwhBidirectionalGraph<
845        properties::SwhGraphProperties<
846            properties::NoMaps,
847            properties::NoTimestamps,
848            properties::NoPersons,
849            properties::NoContents,
850            properties::NoStrings,
851            properties::NoLabelNames,
852        >,
853        FG,
854        BG,
855    > {
856        SwhBidirectionalGraph {
857            properties: properties::SwhGraphProperties::new(
858                &self.basepath,
859                self.forward_graph.num_nodes(),
860            ),
861            basepath: self.basepath,
862            forward_graph: self.forward_graph,
863            backward_graph: self.backward_graph,
864        }
865    }
866
867    /// Enriches the graph with more properties mmapped from disk
868    ///
869    /// # Example
870    ///
871    /// ```no_run
872    /// # use std::path::PathBuf;
873    /// use swh_graph::java_compat::mph::gov::GOVMPH;
874    ///
875    /// swh_graph::graph::SwhBidirectionalGraph::new(PathBuf::from("./graph"))
876    ///     .expect("Could not load graph")
877    ///     .load_all_properties::<GOVMPH>()
878    ///     .expect("Could not load properties");
879    /// ```
880    pub fn load_all_properties<MPHF: LoadableSwhidMphf>(
881        self,
882    ) -> Result<
883        SwhBidirectionalGraph<
884            properties::SwhGraphProperties<
885                properties::MappedMaps<MPHF>,
886                properties::MappedTimestamps,
887                properties::MappedPersons,
888                properties::MappedContents,
889                properties::MappedStrings,
890                properties::MappedLabelNames,
891            >,
892            FG,
893            BG,
894        >,
895    > {
896        self.init_properties()
897            .load_properties(|properties| properties.load_all())
898    }
899}
900
901impl<
902        MAPS: properties::MaybeMaps,
903        TIMESTAMPS: properties::MaybeTimestamps,
904        PERSONS: properties::MaybePersons,
905        CONTENTS: properties::MaybeContents,
906        STRINGS: properties::MaybeStrings,
907        LABELNAMES: properties::MaybeLabelNames,
908        BG: UnderlyingGraph,
909        FG: UnderlyingGraph,
910    > SwhGraphWithProperties
911    for SwhBidirectionalGraph<
912        properties::SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>,
913        FG,
914        BG,
915    >
916{
917    type Maps = MAPS;
918    type Timestamps = TIMESTAMPS;
919    type Persons = PERSONS;
920    type Contents = CONTENTS;
921    type Strings = STRINGS;
922    type LabelNames = LABELNAMES;
923
924    fn properties(
925        &self,
926    ) -> &properties::SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>
927    {
928        &self.properties
929    }
930}
931
932impl<P, FG: RandomAccessGraph + UnderlyingGraph, BG: UnderlyingGraph>
933    SwhBidirectionalGraph<P, FG, BG>
934{
935    /// Consumes this graph and returns a new one that implements [`SwhLabeledForwardGraph`]
936    pub fn load_forward_labels(self) -> Result<SwhBidirectionalGraph<P, Zip<FG, SwhLabeling>, BG>> {
937        Ok(SwhBidirectionalGraph {
938            forward_graph: zip_labels(self.forward_graph, suffix_path(&self.basepath, "-labelled"))
939                .context("Could not load forward labels")?,
940            backward_graph: self.backward_graph,
941            properties: self.properties,
942            basepath: self.basepath,
943        })
944    }
945}
946
947impl<P, FG: UnderlyingGraph, BG: RandomAccessGraph + UnderlyingGraph>
948    SwhBidirectionalGraph<P, FG, BG>
949{
950    /// Consumes this graph and returns a new one that implements [`SwhLabeledBackwardGraph`]
951    pub fn load_backward_labels(
952        self,
953    ) -> Result<SwhBidirectionalGraph<P, FG, Zip<BG, SwhLabeling>>> {
954        Ok(SwhBidirectionalGraph {
955            forward_graph: self.forward_graph,
956            backward_graph: zip_labels(
957                self.backward_graph,
958                // Note: keep british version of "labelled" here for compatibility with Java swh-graph
959                suffix_path(&self.basepath, "-transposed-labelled"),
960            )
961            .context("Could not load backward labels")?,
962            properties: self.properties,
963            basepath: self.basepath,
964        })
965    }
966}
967
968impl<P, FG: RandomAccessGraph + UnderlyingGraph, BG: RandomAccessGraph + UnderlyingGraph>
969    SwhBidirectionalGraph<P, FG, BG>
970{
971    /// Equivalent to calling both
972    /// [`load_forward_labels`](SwhBidirectionalGraph::load_forward_labels) and
973    /// [`load_backward_labels`](SwhBidirectionalGraph::load_backward_labels)
974    pub fn load_labels(
975        self,
976    ) -> Result<SwhBidirectionalGraph<P, Zip<FG, SwhLabeling>, Zip<BG, SwhLabeling>>> {
977        self.load_forward_labels()
978            .context("Could not load forward labels")?
979            .load_backward_labels()
980            .context("Could not load backward labels")
981    }
982}
983
984#[deprecated(
985    since = "5.2.0",
986    note = "please use `SwhUnidirectionalGraph::new` instead"
987)]
988/// Deprecated alias for [`SwhUnidirectionalGraph::new`]
989pub fn load_unidirectional(basepath: impl AsRef<Path>) -> Result<SwhUnidirectionalGraph<()>> {
990    SwhUnidirectionalGraph::new(basepath)
991}
992
993#[deprecated(
994    since = "5.2.0",
995    note = "please use `SwhBidirectionalGraph::new` instead"
996)]
997/// Deprecated alias for [`SwhBidirectionalGraph::new`]
998pub fn load_bidirectional(basepath: impl AsRef<Path>) -> Result<SwhBidirectionalGraph<()>> {
999    SwhBidirectionalGraph::new(basepath)
1000}
1001
1002/// Loads a bidirectional graph, then all its properties, then its labels.
1003///
1004/// This is a shorthand for:
1005///
1006/// ```no_run
1007/// # use std::path::PathBuf;
1008/// # use anyhow::{Context, Result};
1009/// # use swh_graph::graph::SwhBidirectionalGraph;
1010/// # use swh_graph::mph::LoadableSwhidMphf;
1011/// #
1012/// # fn f<MPHF: LoadableSwhidMphf>() -> Result<()> {
1013/// # let basepath = PathBuf::from("./graph");
1014/// let graph = SwhBidirectionalGraph::new(basepath)
1015///     .context("Could not load graph")?
1016///     .load_all_properties::<MPHF>()
1017///     .context("Could not load properties")?
1018///     .load_labels()
1019///     .context("Could not load labels")?;
1020/// # Ok(())
1021/// # }
1022/// ```
1023///
1024/// When working on code made to be re-distributed, you should load only what is needed
1025/// (ie. load unidirectional if you don't need the backward graph, don't load properties
1026/// that are not needed, don't load labels unless they need to be read), so users can
1027/// run your code without a complete graph locally.
1028pub fn load_full<MPHF: LoadableSwhidMphf>(
1029    basepath: impl AsRef<Path>,
1030) -> Result<
1031    SwhBidirectionalGraph<
1032        properties::SwhGraphProperties<
1033            properties::MappedMaps<MPHF>,
1034            properties::MappedTimestamps,
1035            properties::MappedPersons,
1036            properties::MappedContents,
1037            properties::MappedStrings,
1038            properties::MappedLabelNames,
1039        >,
1040        Zip<DefaultUnderlyingGraph, SwhLabeling>,
1041        Zip<DefaultUnderlyingGraph, SwhLabeling>,
1042    >,
1043> {
1044    SwhBidirectionalGraph::new(basepath)
1045        .context("Could not load graph")?
1046        .load_all_properties()
1047        .context("Could not load properties")?
1048        .load_labels()
1049        .context("Could not load labels")
1050}
1051
1052fn zip_labels<G: RandomAccessGraph + UnderlyingGraph, P: AsRef<Path>>(
1053    graph: G,
1054    base_path: P,
1055) -> Result<Zip<G, SwhLabeling>> {
1056    let properties_path = suffix_path(&base_path, ".properties");
1057    let f = std::fs::File::open(&properties_path)
1058        .with_context(|| format!("Could not open {}", properties_path.display()))?;
1059    let map = java_properties::read(std::io::BufReader::new(f))?;
1060
1061    let graphclass = map
1062        .get("graphclass")
1063        .with_context(|| format!("Missing 'graphclass' from {}", properties_path.display()))?;
1064    // Note: keep british version of "labelled" here for compatibility with Java swh-graph
1065    if graphclass != "it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph" {
1066        bail!(
1067            "Expected graphclass in {} to be \"it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph\", got {:?}",
1068            properties_path.display(),
1069            graphclass
1070        );
1071    }
1072
1073    let labelspec = map
1074        .get("labelspec")
1075        .with_context(|| format!("Missing 'labelspec' from {}", properties_path.display()))?;
1076    let width = labelspec
1077        .strip_prefix("org.softwareheritage.graph.labels.SwhLabel(DirEntry,")
1078        .and_then(|labelspec| labelspec.strip_suffix(')'))
1079        .and_then(|labelspec| labelspec.parse::<usize>().ok());
1080    let width = match width {
1081        None =>
1082        bail!("Expected labelspec in {} to be \"org.softwareheritage.graph.labels.SwhLabel(DirEntry,<integer>)\" (where <integer> is a small integer, usually under 30), got {:?}", properties_path.display(), labelspec),
1083        Some(width) => width
1084    };
1085
1086    let labels = crate::labeling::mmap(&base_path, crate::labeling::SwhDeserializer::new(width))
1087        .with_context(|| {
1088            format!(
1089                "Could not load labeling from {}",
1090                base_path.as_ref().display()
1091            )
1092        })?;
1093
1094    debug_assert!(webgraph::prelude::Zip(&graph, &labels).verify());
1095    Ok(Zip(graph, labels))
1096}