Skip to main content

swh_graph/
graph.rs

1// Copyright (C) 2023-2026  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6//! Structures to manipulate the Software Heritage graph
7//!
8//! In order to load only what is necessary, these structures are initially created
9//! by calling [`SwhUnidirectionalGraph::new`] or [`SwhBidirectionalGraph::new`], then calling methods
10//! on them to progressively load additional data (`load_properties`, `load_all_properties`,
11//! `load_labels`)
12
13#![allow(clippy::type_complexity)]
14
15use std::borrow::Borrow;
16use std::collections::HashMap;
17use std::iter::Iterator;
18use std::path::{Path, PathBuf};
19
20use anyhow::{anyhow, bail, Context, Result};
21use dsi_progress_logger::{ConcurrentProgressLog, ProgressLog};
22use rayon::prelude::*;
23use webgraph::graphs::vec_graph::LabeledVecGraph;
24use webgraph::prelude::*;
25
26use crate::arc_iterators::{
27    DelabelingIterator, LabelTypingSuccessorIterator, LabeledArcIterator, LabeledSuccessorIterator,
28};
29// Useful trait to import as part of `use swh_graph::graph::*;`
30pub use crate::arc_iterators::IntoFlattenedLabeledArcsIterator;
31use crate::labeling::SwhLabeling;
32use crate::labels::{EdgeLabel, UntypedEdgeLabel};
33use crate::mph::LoadableSwhidMphf;
34use crate::properties;
35pub use crate::underlying_graph::DefaultUnderlyingGraph;
36use crate::utils::shuffle::par_iter_shuffled_range;
37use crate::utils::suffix_path;
38use crate::NodeType;
39
40/// Alias for [`usize`], which may become a newtype in a future version.
41pub type NodeId = usize;
42
43/// Supertrait of [`RandomAccessLabeling`] with methods to act like a [`RandomAccessGraph`].
44///
45/// If `Self` implements [`RandomAccessGraph`], then this is implemented as no-ops.
46/// Otherwise, it "unpeels" layers of zipping until it reaches the graph at the bottom:
47///
48/// - If `Self` is `Zip<L, R>`, this defers to `L` (aka. `Left<Zip<L, R>`).
49/// - If `Self` is `VecGraph` or `LabeledVecGraph<_>, this does the equivalent of deferring to `VecGraph`
50///   (through [`DelabelingIterator`] because it cannot create a new `VecGraph` without copying)
51pub trait UnderlyingGraph: RandomAccessLabeling {
52    type UnlabeledSuccessors<'succ>: IntoIterator<Item = NodeId>
53    where
54        Self: 'succ;
55
56    /// Workaround for some implementations of `<Self as RandomAccessLabeling>::num_arcs`
57    /// being missing
58    ///
59    /// `Zip::num_arcs` runs `assert_eq!(self.0.num_arcs(), self.1.num_arcs());`
60    /// but `BitStreamLabeling::num_arcs` always panics as of
61    /// f460742fe0f776df2248a5f09a3425b81eb70b07, so we can't use that.
62    fn num_arcs(&self) -> u64;
63    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool;
64    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_>;
65}
66
67impl<F: RandomAccessDecoderFactory> UnderlyingGraph for BvGraph<F> {
68    type UnlabeledSuccessors<'succ>
69        = <Self as RandomAccessLabeling>::Labels<'succ>
70    where
71        Self: 'succ;
72
73    #[inline(always)]
74    fn num_arcs(&self) -> u64 {
75        <Self as RandomAccessLabeling>::num_arcs(self)
76    }
77    #[inline(always)]
78    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
79        <Self as RandomAccessGraph>::has_arc(self, src_node_id, dst_node_id)
80    }
81    #[inline(always)]
82    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_> {
83        <Self as RandomAccessGraph>::successors(self, node_id)
84    }
85}
86
87impl<G: UnderlyingGraph, L: RandomAccessLabeling> UnderlyingGraph for Zip<G, L> {
88    type UnlabeledSuccessors<'succ>
89        = <G as UnderlyingGraph>::UnlabeledSuccessors<'succ>
90    where
91        Self: 'succ;
92
93    #[inline(always)]
94    fn num_arcs(&self) -> u64 {
95        <G as UnderlyingGraph>::num_arcs(&self.0)
96    }
97    #[inline(always)]
98    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
99        self.0.has_arc(src_node_id, dst_node_id)
100    }
101    #[inline(always)]
102    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_> {
103        self.0.unlabeled_successors(node_id)
104    }
105}
106
107impl UnderlyingGraph for VecGraph {
108    type UnlabeledSuccessors<'succ>
109        = <Self as RandomAccessLabeling>::Labels<'succ>
110    where
111        Self: 'succ;
112
113    #[inline(always)]
114    fn num_arcs(&self) -> u64 {
115        <Self as RandomAccessLabeling>::num_arcs(self)
116    }
117    #[inline(always)]
118    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
119        <Self as RandomAccessGraph>::has_arc(self, src_node_id, dst_node_id)
120    }
121    #[inline(always)]
122    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_> {
123        <Self as RandomAccessGraph>::successors(self, node_id)
124    }
125}
126
127impl<L: Clone> UnderlyingGraph for LabeledVecGraph<L> {
128    type UnlabeledSuccessors<'succ>
129        = DelabelingIterator<<Self as RandomAccessLabeling>::Labels<'succ>>
130    where
131        Self: 'succ;
132
133    #[inline(always)]
134    fn num_arcs(&self) -> u64 {
135        <Self as RandomAccessLabeling>::num_arcs(self)
136    }
137    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
138        for succ in self.unlabeled_successors(src_node_id) {
139            if succ == dst_node_id {
140                return true;
141            }
142        }
143        false
144    }
145    #[inline(always)]
146    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_> {
147        DelabelingIterator {
148            successors: <Self as RandomAccessLabeling>::labels(self, node_id),
149        }
150    }
151}
152
153pub trait SwhGraph {
154    /// Return the base path of the graph
155    fn path(&self) -> &Path;
156
157    /// Returns whether the graph is in the `ori->snp->rel,rev->dir->cnt` direction
158    /// (with a few `dir->rev` arcs)
159    fn is_transposed(&self) -> bool;
160
161    /// Return the largest node id in the graph plus one.
162    ///
163    /// For graphs directly loaded from disk this is the actual number of nodes,
164    /// but it is an overestimate for some graphs (eg. [`Subgraph`](crate::views::Subgraph)).
165    ///
166    /// Use [`actual_num_nodes`](Self::actual_num_nodes) if you need the exact number of
167    /// nodes in the graph.
168    fn num_nodes(&self) -> usize;
169
170    /// Returns whether the given node id exists in the graph
171    ///
172    /// This is usually true iff `node_id < self.num_nodes()`, but may be false
173    /// when using a filtered view such as [`Subgraph`](crate::views::Subgraph).
174    fn has_node(&self, node_id: NodeId) -> bool {
175        node_id < self.num_nodes()
176    }
177
178    /// Return the number of arcs in the graph.
179    fn num_arcs(&self) -> u64;
180
181    /// Returns the number of nodes of each type, if known.
182    fn num_nodes_by_type(&self) -> Result<HashMap<NodeType, usize>> {
183        let path = self.path().with_extension("nodes.stats.txt");
184        std::fs::read_to_string(&path)
185            .with_context(|| format!("Could not read {}", path.display()))?
186            .lines()
187            .map(|line| {
188                let (type_, count) = line
189                    .split_once(' ')
190                    .with_context(|| format!("Unexpected line in {}: {}", path.display(), line))?;
191                let type_: NodeType = type_
192                    .parse()
193                    .map_err(|_| anyhow!("Unknown node type in {}: {}", path.display(), type_))?;
194                let count = count
195                    .parse()
196                    .map_err(|_| anyhow!("Invalid integer in {}: {}", path.display(), count))?;
197                Ok((type_, count))
198            })
199            .collect()
200    }
201    /// Returns the number of nodes in the graph, if known.
202    fn actual_num_nodes(&self) -> Result<usize> {
203        Ok(self.num_nodes_by_type()?.values().sum())
204    }
205    /// Returns the number of arcs of each type, if known.
206    fn num_arcs_by_type(&self) -> Result<HashMap<(NodeType, NodeType), usize>> {
207        let path = self.path().with_extension("edges.stats.txt");
208        std::fs::read_to_string(&path)
209            .with_context(|| format!("Could not read {}", path.display()))?
210            .lines()
211            .map(|line| {
212                let (type_, count) = line
213                    .split_once(' ')
214                    .with_context(|| format!("Unexpected line in {}: {}", path.display(), line))?;
215                let (src_type, dst_type) = type_
216                    .split_once(':')
217                    .with_context(|| format!("Unexpected line in {}: {}", path.display(), line))?;
218                let src_type: NodeType = src_type.parse().map_err(|_| {
219                    anyhow!("Unknown node type in {}: {}", path.display(), src_type)
220                })?;
221                let dst_type: NodeType = dst_type.parse().map_err(|_| {
222                    anyhow!("Unknown node type in {}: {}", path.display(), dst_type)
223                })?;
224                let count = count
225                    .parse()
226                    .map_err(|_| anyhow!("Invalid integer in {}: {}", path.display(), count))?;
227                Ok(((src_type, dst_type), count))
228            })
229            .collect()
230    }
231
232    /// Return whether there is an arc going from `src_node_id` to `dst_node_id`.
233    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool;
234
235    /// Returns an iterator on all the nodes
236    ///
237    /// Order is not guaranteed.
238    ///
239    /// Updates the progress logger on every node id from 0 to `self.num_nodes()`,
240    /// even those that are filtered out.
241    fn iter_nodes<'a>(
242        &'a self,
243        mut pl: impl ProgressLog + 'a,
244    ) -> impl Iterator<Item = NodeId> + 'a {
245        (0..self.num_nodes())
246            .inspect(move |_| pl.light_update())
247            .filter(|&node_id| self.has_node(node_id))
248    }
249    /// Returns a parallel iterator on all the nodes
250    ///
251    /// Order is not guaranteed.
252    ///
253    /// Updates the progress logger on every node id from 0 to `self.num_nodes()`,
254    /// even those that are filtered out.
255    fn par_iter_nodes<'a>(
256        &'a self,
257        pl: impl ConcurrentProgressLog + 'a,
258    ) -> impl ParallelIterator<Item = NodeId> + 'a
259    where
260        Self: Sync,
261    {
262        par_iter_shuffled_range(0..self.num_nodes())
263            .map_with(pl, move |pl, node_id| {
264                pl.update();
265                node_id
266            })
267            .filter(|&node_id| self.has_node(node_id))
268    }
269}
270
271pub trait SwhForwardGraph: SwhGraph {
272    type Successors<'succ>: IntoIterator<Item = usize>
273    where
274        Self: 'succ;
275
276    /// Return an [`IntoIterator`] over the successors of a node.
277    fn successors(&self, node_id: NodeId) -> Self::Successors<'_>;
278    /// Return the number of successors of a node.
279    fn outdegree(&self, node_id: NodeId) -> usize;
280}
281
282#[diagnostic::on_unimplemented(
283    label = "does not have forward labels loaded",
284    note = "Use `let graph = graph.load_labels()` to load them"
285)]
286pub trait SwhLabeledForwardGraph:
287    SwhForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>
288{
289    type LabeledArcs<'arc>: Iterator<Item = UntypedEdgeLabel>
290    where
291        Self: 'arc;
292    type LabeledSuccessors<'node>: IntoIterator<Item = (usize, Self::LabeledArcs<'node>)>
293        + IntoFlattenedLabeledArcsIterator<UntypedEdgeLabel>
294    where
295        Self: 'node;
296
297    /// Return an [`IntoIterator`] over the successors of a node along with a list of labels
298    /// of each arc
299    fn untyped_labeled_successors(&self, node_id: NodeId) -> Self::LabeledSuccessors<'_>;
300
301    /// Return an [`IntoIterator`] over the successors of a node along with a list of labels
302    /// of each arc
303    fn labeled_successors(
304        &self,
305        node_id: NodeId,
306    ) -> impl Iterator<Item = (usize, impl Iterator<Item = EdgeLabel>)>
307           + IntoFlattenedLabeledArcsIterator<EdgeLabel>
308           + '_ {
309        LabelTypingSuccessorIterator {
310            graph: self,
311            is_transposed: self.is_transposed(),
312            src: node_id,
313            successors: self.untyped_labeled_successors(node_id).into_iter(),
314        }
315    }
316}
317
318#[diagnostic::on_unimplemented(
319    label = "does not have backward arcs loaded",
320    note = "Use SwhBidirectionalGraph instead of SwhUnidirectionalGraph if you don't already"
321)]
322pub trait SwhBackwardGraph: SwhGraph {
323    type Predecessors<'succ>: IntoIterator<Item = usize>
324    where
325        Self: 'succ;
326
327    /// Return an [`IntoIterator`] over the predecessors of a node.
328    fn predecessors(&self, node_id: NodeId) -> Self::Predecessors<'_>;
329    /// Return the number of predecessors of a node.
330    fn indegree(&self, node_id: NodeId) -> usize;
331}
332
333#[diagnostic::on_unimplemented(
334    label = "does not have backward labels loaded",
335    note = "Use SwhBidirectionalGraph instead of SwhUnidirectionalGraph if you don't already",
336    note = "and `let graph = graph.load_labels()`"
337)]
338pub trait SwhLabeledBackwardGraph:
339    SwhBackwardGraph + SwhGraphWithProperties<Maps: properties::Maps>
340{
341    type LabeledArcs<'arc>: Iterator<Item = UntypedEdgeLabel>
342    where
343        Self: 'arc;
344    type LabeledPredecessors<'node>: IntoIterator<Item = (usize, Self::LabeledArcs<'node>)>
345        + IntoFlattenedLabeledArcsIterator<UntypedEdgeLabel>
346    where
347        Self: 'node;
348
349    /// Return an [`IntoIterator`] over the predecessors of a node along with a list of labels
350    /// of each arc
351    fn untyped_labeled_predecessors(&self, node_id: NodeId) -> Self::LabeledPredecessors<'_>;
352
353    /// Return an [`IntoIterator`] over the predecessors of a node along with a list of labels
354    /// of each arc
355    fn labeled_predecessors(
356        &self,
357        node_id: NodeId,
358    ) -> impl IntoIterator<Item = (usize, impl Iterator<Item = crate::labels::EdgeLabel>)>
359           + IntoFlattenedLabeledArcsIterator<EdgeLabel>
360           + '_ {
361        LabelTypingSuccessorIterator {
362            graph: self,
363            is_transposed: !self.is_transposed(),
364            src: node_id,
365            successors: self.untyped_labeled_predecessors(node_id).into_iter(),
366        }
367    }
368}
369
370pub trait SwhGraphWithProperties: SwhGraph {
371    type Maps: properties::MaybeMaps;
372    type Timestamps: properties::MaybeTimestamps;
373    type Persons: properties::MaybePersons;
374    type Contents: properties::MaybeContents;
375    type Strings: properties::MaybeStrings;
376    type LabelNames: properties::MaybeLabelNames;
377
378    fn properties(
379        &self,
380    ) -> &properties::SwhGraphProperties<
381        Self::Maps,
382        Self::Timestamps,
383        Self::Persons,
384        Self::Contents,
385        Self::Strings,
386        Self::LabelNames,
387    >;
388}
389
390/// Alias for structures representing a graph with all arcs, arc labels, and node properties
391/// loaded.
392pub trait SwhFullGraph:
393    SwhLabeledForwardGraph
394    + SwhLabeledBackwardGraph
395    + SwhGraphWithProperties<
396        Maps: properties::Maps,
397        Timestamps: properties::Timestamps,
398        Persons: properties::Persons,
399        Contents: properties::Contents,
400        Strings: properties::Strings,
401        LabelNames: properties::LabelNames,
402    >
403{
404}
405
406impl<
407        G: SwhLabeledForwardGraph
408            + SwhLabeledBackwardGraph
409            + SwhGraphWithProperties<
410                Maps: properties::Maps,
411                Timestamps: properties::Timestamps,
412                Persons: properties::Persons,
413                Contents: properties::Contents,
414                Strings: properties::Strings,
415                LabelNames: properties::LabelNames,
416            >,
417    > SwhFullGraph for G
418{
419}
420
421/// Class representing the compressed Software Heritage graph in a single direction.
422///
423/// Type parameters:
424///
425/// * `P` is either `()` or `properties::SwhGraphProperties`, manipulated using
426///   [`load_properties`](SwhUnidirectionalGraph::load_properties) and
427///   [`load_all_properties`](SwhUnidirectionalGraph::load_all_properties)
428/// * G is the forward graph (either [`BvGraph`], or `Zip<BvGraph, SwhLabeling>`
429///   [`load_labels`](SwhUnidirectionalGraph::load_labels)
430pub struct SwhUnidirectionalGraph<P, G: UnderlyingGraph = DefaultUnderlyingGraph> {
431    basepath: PathBuf,
432    graph: G,
433    properties: P,
434}
435
436impl SwhUnidirectionalGraph<()> {
437    pub fn new(basepath: impl AsRef<Path>) -> Result<Self> {
438        let basepath = basepath.as_ref().to_owned();
439        let graph = DefaultUnderlyingGraph::new(&basepath)?;
440        Ok(Self::from_underlying_graph(basepath, graph))
441    }
442}
443
444impl<G: UnderlyingGraph> SwhUnidirectionalGraph<(), G> {
445    pub fn from_underlying_graph(basepath: PathBuf, graph: G) -> Self {
446        SwhUnidirectionalGraph {
447            basepath,
448            graph,
449            properties: (),
450        }
451    }
452}
453
454impl<P, G: UnderlyingGraph> SwhGraph for SwhUnidirectionalGraph<P, G> {
455    #[inline(always)]
456    fn path(&self) -> &Path {
457        self.basepath.as_path()
458    }
459
460    fn is_transposed(&self) -> bool {
461        // Technically, users can load the 'graph-transposed' directly.
462        // However, unless they rename files, this will fail to load properties, because
463        // properties' file names wouldn't match the base path.
464        // As 'is_transposed' is only useful when checking node types (of an arc),
465        // this function is unlikely to be called in that scenario, so this should be fine.
466        false
467    }
468
469    #[inline(always)]
470    fn num_nodes(&self) -> usize {
471        self.graph.num_nodes()
472    }
473
474    #[inline(always)]
475    fn num_arcs(&self) -> u64 {
476        UnderlyingGraph::num_arcs(&self.graph)
477    }
478
479    #[inline(always)]
480    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
481        self.graph.has_arc(src_node_id, dst_node_id)
482    }
483}
484
485impl<P, G: UnderlyingGraph> SwhForwardGraph for SwhUnidirectionalGraph<P, G> {
486    type Successors<'succ>
487        = <G as UnderlyingGraph>::UnlabeledSuccessors<'succ>
488    where
489        Self: 'succ;
490
491    /// Return an [`IntoIterator`] over the successors of a node.
492    #[inline(always)]
493    fn successors(&self, node_id: NodeId) -> Self::Successors<'_> {
494        self.graph.unlabeled_successors(node_id)
495    }
496
497    /// Return the number of successors of a node.
498    #[inline(always)]
499    fn outdegree(&self, node_id: NodeId) -> usize {
500        self.graph.outdegree(node_id)
501    }
502}
503
504impl<P, G: UnderlyingGraph> SwhLabeledForwardGraph for SwhUnidirectionalGraph<P, G>
505where
506    <G as SequentialLabeling>::Label: Pair<Left = NodeId, Right: IntoIterator<Item: Borrow<u64>>>,
507    for<'succ> <G as RandomAccessLabeling>::Labels<'succ>:
508        Iterator<Item = (usize, <<G as SequentialLabeling>::Label as Pair>::Right)>,
509    Self: SwhGraphWithProperties<Maps: properties::Maps>,
510{
511    type LabeledArcs<'arc> = LabeledArcIterator<<<<<G as RandomAccessLabeling>::Labels<'arc> as Iterator>::Item as Pair>::Right as IntoIterator>::IntoIter> where Self: 'arc;
512    type LabeledSuccessors<'succ>
513        = LabeledSuccessorIterator<<G as RandomAccessLabeling>::Labels<'succ>>
514    where
515        Self: 'succ;
516
517    fn untyped_labeled_successors(&self, node_id: NodeId) -> Self::LabeledSuccessors<'_> {
518        LabeledSuccessorIterator {
519            successors: self.graph.labels(node_id),
520        }
521    }
522}
523
524impl<
525        M: properties::MaybeMaps,
526        T: properties::MaybeTimestamps,
527        P: properties::MaybePersons,
528        C: properties::MaybeContents,
529        S: properties::MaybeStrings,
530        N: properties::MaybeLabelNames,
531        G: UnderlyingGraph,
532    > SwhUnidirectionalGraph<properties::SwhGraphProperties<M, T, P, C, S, N>, G>
533{
534    /// Enriches the graph with more properties mmapped from disk
535    ///
536    /// # Example
537    ///
538    /// ```no_run
539    /// # use std::path::PathBuf;
540    /// use swh_graph::java_compat::mph::gov::GOVMPH;
541    ///
542    /// swh_graph::graph::SwhUnidirectionalGraph::new(PathBuf::from("./graph"))
543    ///     .expect("Could not load graph")
544    ///     .init_properties()
545    ///     .load_properties(|properties| properties.load_maps::<GOVMPH>())
546    ///     .expect("Could not load SWHID maps")
547    ///     .load_properties(|properties| properties.load_timestamps())
548    ///     .expect("Could not load timestamps");
549    /// ```
550    pub fn load_properties<
551        M2: properties::MaybeMaps,
552        T2: properties::MaybeTimestamps,
553        P2: properties::MaybePersons,
554        C2: properties::MaybeContents,
555        S2: properties::MaybeStrings,
556        N2: properties::MaybeLabelNames,
557    >(
558        self,
559        loader: impl FnOnce(
560            properties::SwhGraphProperties<M, T, P, C, S, N>,
561        ) -> Result<properties::SwhGraphProperties<M2, T2, P2, C2, S2, N2>>,
562    ) -> Result<SwhUnidirectionalGraph<properties::SwhGraphProperties<M2, T2, P2, C2, S2, N2>, G>>
563    {
564        Ok(SwhUnidirectionalGraph {
565            properties: loader(self.properties)?,
566            basepath: self.basepath,
567            graph: self.graph,
568        })
569    }
570}
571
572impl<G: UnderlyingGraph> SwhUnidirectionalGraph<(), G> {
573    /// Prerequisite for `load_properties`
574    pub fn init_properties(
575        self,
576    ) -> SwhUnidirectionalGraph<
577        properties::SwhGraphProperties<
578            properties::NoMaps,
579            properties::NoTimestamps,
580            properties::NoPersons,
581            properties::NoContents,
582            properties::NoStrings,
583            properties::NoLabelNames,
584        >,
585        G,
586    > {
587        SwhUnidirectionalGraph {
588            properties: properties::SwhGraphProperties::new(&self.basepath, self.graph.num_nodes()),
589            basepath: self.basepath,
590            graph: self.graph,
591        }
592    }
593
594    /// Enriches the graph with all properties, mmapped from disk
595    ///
596    /// # Example
597    ///
598    /// ```no_run
599    /// # use std::path::PathBuf;
600    /// use swh_graph::java_compat::mph::gov::GOVMPH;
601    ///
602    /// swh_graph::graph::SwhUnidirectionalGraph::new(PathBuf::from("./graph"))
603    ///     .expect("Could not load graph")
604    ///     .load_all_properties::<GOVMPH>()
605    ///     .expect("Could not load properties");
606    /// ```
607    pub fn load_all_properties<MPHF: LoadableSwhidMphf>(
608        self,
609    ) -> Result<
610        SwhUnidirectionalGraph<
611            properties::SwhGraphProperties<
612                properties::MappedMaps<MPHF>,
613                properties::MappedTimestamps,
614                properties::MappedPersons,
615                properties::MappedContents,
616                properties::MappedStrings,
617                properties::MappedLabelNames,
618            >,
619            G,
620        >,
621    > {
622        self.init_properties()
623            .load_properties(|properties| properties.load_all())
624    }
625}
626
627impl<
628        MAPS: properties::MaybeMaps,
629        TIMESTAMPS: properties::MaybeTimestamps,
630        PERSONS: properties::MaybePersons,
631        CONTENTS: properties::MaybeContents,
632        STRINGS: properties::MaybeStrings,
633        LABELNAMES: properties::MaybeLabelNames,
634        G: UnderlyingGraph,
635    > SwhGraphWithProperties
636    for SwhUnidirectionalGraph<
637        properties::SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>,
638        G,
639    >
640{
641    type Maps = MAPS;
642    type Timestamps = TIMESTAMPS;
643    type Persons = PERSONS;
644    type Contents = CONTENTS;
645    type Strings = STRINGS;
646    type LabelNames = LABELNAMES;
647
648    fn properties(
649        &self,
650    ) -> &properties::SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>
651    {
652        &self.properties
653    }
654}
655
656impl<P, G: RandomAccessGraph + UnderlyingGraph> SwhUnidirectionalGraph<P, G> {
657    /// Consumes this graph and returns a new one that implements [`SwhLabeledForwardGraph`]
658    pub fn load_labels(self) -> Result<SwhUnidirectionalGraph<P, Zip<G, SwhLabeling>>> {
659        Ok(SwhUnidirectionalGraph {
660            // Note: keep british version of "labelled" here for compatibility with Java swh-graph
661            graph: zip_labels(self.graph, suffix_path(&self.basepath, "-labelled"))
662                .context("Could not load forward labels")?,
663            properties: self.properties,
664            basepath: self.basepath,
665        })
666    }
667}
668
669/// Class representing the compressed Software Heritage graph in both directions.
670///
671/// Type parameters:
672///
673/// * `P` is either `()` or `properties::SwhGraphProperties`, manipulated using
674///   [`load_properties`](SwhBidirectionalGraph::load_properties) and
675///   [`load_all_properties`](SwhBidirectionalGraph::load_all_properties)
676/// * FG is the forward graph (either [`BvGraph`], or `Zip<BvGraph, SwhLabeling>`
677///   after using [`load_forward_labels`](SwhBidirectionalGraph::load_forward_labels)
678/// * BG is the backward graph (either [`BvGraph`], or `Zip<BvGraph, SwhLabeling>`
679///   after using [`load_backward_labels`](SwhBidirectionalGraph::load_backward_labels)
680pub struct SwhBidirectionalGraph<
681    P,
682    FG: UnderlyingGraph = DefaultUnderlyingGraph,
683    BG: UnderlyingGraph = DefaultUnderlyingGraph,
684> {
685    basepath: PathBuf,
686    forward_graph: FG,
687    backward_graph: BG,
688    properties: P,
689}
690
691impl SwhBidirectionalGraph<()> {
692    pub fn new(basepath: impl AsRef<Path>) -> Result<Self> {
693        let basepath = basepath.as_ref().to_owned();
694        let forward_graph = DefaultUnderlyingGraph::new(&basepath)?;
695        let backward_graph = DefaultUnderlyingGraph::new(suffix_path(&basepath, "-transposed"))?;
696        Ok(Self::from_underlying_graphs(
697            basepath,
698            forward_graph,
699            backward_graph,
700        ))
701    }
702}
703
704impl<FG: UnderlyingGraph, BG: UnderlyingGraph> SwhBidirectionalGraph<(), FG, BG> {
705    pub fn from_underlying_graphs(
706        basepath: PathBuf,
707        forward_graph: FG,
708        backward_graph: BG,
709    ) -> Self {
710        SwhBidirectionalGraph {
711            basepath,
712            backward_graph,
713            forward_graph,
714            properties: (),
715        }
716    }
717}
718
719impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhGraph for SwhBidirectionalGraph<P, FG, BG> {
720    #[inline(always)]
721    fn path(&self) -> &Path {
722        self.basepath.as_path()
723    }
724
725    fn is_transposed(&self) -> bool {
726        // Technically, users can load the 'graph-transposed' directly.
727        // However, unless they rename files, this will fail to load properties, because
728        // properties' file names wouldn't match the base path.
729        // As 'is_transposed' is only useful when checking node types (of an arc),
730        // this function is unlikely to be called in that scenario, so this should be fine.
731        false
732    }
733
734    #[inline(always)]
735    fn num_nodes(&self) -> usize {
736        self.forward_graph.num_nodes()
737    }
738
739    #[inline(always)]
740    fn num_arcs(&self) -> u64 {
741        UnderlyingGraph::num_arcs(&self.forward_graph)
742    }
743
744    #[inline(always)]
745    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
746        self.forward_graph.has_arc(src_node_id, dst_node_id)
747    }
748}
749
750impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhForwardGraph
751    for SwhBidirectionalGraph<P, FG, BG>
752{
753    type Successors<'succ>
754        = <FG as UnderlyingGraph>::UnlabeledSuccessors<'succ>
755    where
756        Self: 'succ;
757
758    #[inline(always)]
759    fn successors(&self, node_id: NodeId) -> Self::Successors<'_> {
760        self.forward_graph.unlabeled_successors(node_id)
761    }
762    #[inline(always)]
763    fn outdegree(&self, node_id: NodeId) -> usize {
764        self.forward_graph.outdegree(node_id)
765    }
766}
767
768impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhLabeledForwardGraph
769    for SwhBidirectionalGraph<P, FG, BG>
770where
771    <FG as SequentialLabeling>::Label: Pair<Left = NodeId, Right: IntoIterator<Item: Borrow<u64>>>,
772    for<'succ> <FG as RandomAccessLabeling>::Labels<'succ>:
773        Iterator<Item = (usize, <<FG as SequentialLabeling>::Label as Pair>::Right)>,
774    Self: SwhGraphWithProperties<Maps: properties::Maps>,
775{
776    type LabeledArcs<'arc> = LabeledArcIterator<<<<<FG as RandomAccessLabeling>::Labels<'arc> as Iterator>::Item as Pair>::Right as IntoIterator>::IntoIter> where Self: 'arc;
777    type LabeledSuccessors<'succ>
778        = LabeledSuccessorIterator<<FG as RandomAccessLabeling>::Labels<'succ>>
779    where
780        Self: 'succ;
781
782    fn untyped_labeled_successors(&self, node_id: NodeId) -> Self::LabeledSuccessors<'_> {
783        LabeledSuccessorIterator {
784            successors: self.forward_graph.labels(node_id),
785        }
786    }
787}
788
789impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhBackwardGraph
790    for SwhBidirectionalGraph<P, FG, BG>
791{
792    type Predecessors<'succ>
793        = <BG as UnderlyingGraph>::UnlabeledSuccessors<'succ>
794    where
795        Self: 'succ;
796
797    #[inline(always)]
798    fn predecessors(&self, node_id: NodeId) -> Self::Predecessors<'_> {
799        self.backward_graph.unlabeled_successors(node_id)
800    }
801
802    #[inline(always)]
803    fn indegree(&self, node_id: NodeId) -> usize {
804        self.backward_graph.outdegree(node_id)
805    }
806}
807
808impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhLabeledBackwardGraph
809    for SwhBidirectionalGraph<P, FG, BG>
810where
811    <BG as SequentialLabeling>::Label: Pair<Left = NodeId, Right: IntoIterator<Item: Borrow<u64>>>,
812    for<'succ> <BG as RandomAccessLabeling>::Labels<'succ>:
813        Iterator<Item = (usize, <<BG as SequentialLabeling>::Label as Pair>::Right)>,
814    Self: SwhGraphWithProperties<Maps: properties::Maps>,
815{
816    type LabeledArcs<'arc> = LabeledArcIterator<<<<<BG as RandomAccessLabeling>::Labels<'arc> as Iterator>::Item as Pair>::Right as IntoIterator>::IntoIter> where Self: 'arc;
817    type LabeledPredecessors<'succ>
818        = LabeledSuccessorIterator<<BG as RandomAccessLabeling>::Labels<'succ>>
819    where
820        Self: 'succ;
821
822    fn untyped_labeled_predecessors(&self, node_id: NodeId) -> Self::LabeledPredecessors<'_> {
823        LabeledSuccessorIterator {
824            successors: self.backward_graph.labels(node_id),
825        }
826    }
827}
828
829impl<
830        M: properties::MaybeMaps,
831        T: properties::MaybeTimestamps,
832        P: properties::MaybePersons,
833        C: properties::MaybeContents,
834        S: properties::MaybeStrings,
835        N: properties::MaybeLabelNames,
836        BG: UnderlyingGraph,
837        FG: UnderlyingGraph,
838    > SwhBidirectionalGraph<properties::SwhGraphProperties<M, T, P, C, S, N>, FG, BG>
839{
840    /// Enriches the graph with more properties mmapped from disk
841    ///
842    /// # Example
843    ///
844    /// ```no_run
845    /// # use std::path::PathBuf;
846    /// use swh_graph::java_compat::mph::gov::GOVMPH;
847    /// use swh_graph::SwhGraphProperties;
848    ///
849    /// swh_graph::graph::SwhBidirectionalGraph::new(PathBuf::from("./graph"))
850    ///     .expect("Could not load graph")
851    ///     .init_properties()
852    ///     .load_properties(SwhGraphProperties::load_maps::<GOVMPH>)
853    ///     .expect("Could not load SWHID maps")
854    ///     .load_properties(SwhGraphProperties::load_timestamps)
855    ///     .expect("Could not load timestamps");
856    /// ```
857    pub fn load_properties<
858        M2: properties::MaybeMaps,
859        T2: properties::MaybeTimestamps,
860        P2: properties::MaybePersons,
861        C2: properties::MaybeContents,
862        S2: properties::MaybeStrings,
863        N2: properties::MaybeLabelNames,
864    >(
865        self,
866        loader: impl FnOnce(
867            properties::SwhGraphProperties<M, T, P, C, S, N>,
868        ) -> Result<properties::SwhGraphProperties<M2, T2, P2, C2, S2, N2>>,
869    ) -> Result<SwhBidirectionalGraph<properties::SwhGraphProperties<M2, T2, P2, C2, S2, N2>, FG, BG>>
870    {
871        Ok(SwhBidirectionalGraph {
872            properties: loader(self.properties)?,
873            basepath: self.basepath,
874            forward_graph: self.forward_graph,
875            backward_graph: self.backward_graph,
876        })
877    }
878}
879
880impl<FG: UnderlyingGraph, BG: UnderlyingGraph> SwhBidirectionalGraph<(), FG, BG> {
881    /// Prerequisite for `load_properties`
882    pub fn init_properties(
883        self,
884    ) -> SwhBidirectionalGraph<
885        properties::SwhGraphProperties<
886            properties::NoMaps,
887            properties::NoTimestamps,
888            properties::NoPersons,
889            properties::NoContents,
890            properties::NoStrings,
891            properties::NoLabelNames,
892        >,
893        FG,
894        BG,
895    > {
896        SwhBidirectionalGraph {
897            properties: properties::SwhGraphProperties::new(
898                &self.basepath,
899                self.forward_graph.num_nodes(),
900            ),
901            basepath: self.basepath,
902            forward_graph: self.forward_graph,
903            backward_graph: self.backward_graph,
904        }
905    }
906
907    /// Enriches the graph with more properties mmapped from disk
908    ///
909    /// # Example
910    ///
911    /// ```no_run
912    /// # use std::path::PathBuf;
913    /// use swh_graph::java_compat::mph::gov::GOVMPH;
914    ///
915    /// swh_graph::graph::SwhBidirectionalGraph::new(PathBuf::from("./graph"))
916    ///     .expect("Could not load graph")
917    ///     .load_all_properties::<GOVMPH>()
918    ///     .expect("Could not load properties");
919    /// ```
920    pub fn load_all_properties<MPHF: LoadableSwhidMphf>(
921        self,
922    ) -> Result<
923        SwhBidirectionalGraph<
924            properties::SwhGraphProperties<
925                properties::MappedMaps<MPHF>,
926                properties::MappedTimestamps,
927                properties::MappedPersons,
928                properties::MappedContents,
929                properties::MappedStrings,
930                properties::MappedLabelNames,
931            >,
932            FG,
933            BG,
934        >,
935    > {
936        self.init_properties()
937            .load_properties(|properties| properties.load_all())
938    }
939}
940
941impl<
942        MAPS: properties::MaybeMaps,
943        TIMESTAMPS: properties::MaybeTimestamps,
944        PERSONS: properties::MaybePersons,
945        CONTENTS: properties::MaybeContents,
946        STRINGS: properties::MaybeStrings,
947        LABELNAMES: properties::MaybeLabelNames,
948        BG: UnderlyingGraph,
949        FG: UnderlyingGraph,
950    > SwhGraphWithProperties
951    for SwhBidirectionalGraph<
952        properties::SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>,
953        FG,
954        BG,
955    >
956{
957    type Maps = MAPS;
958    type Timestamps = TIMESTAMPS;
959    type Persons = PERSONS;
960    type Contents = CONTENTS;
961    type Strings = STRINGS;
962    type LabelNames = LABELNAMES;
963
964    #[inline(always)]
965    fn properties(
966        &self,
967    ) -> &properties::SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>
968    {
969        &self.properties
970    }
971}
972
973impl<P, FG: RandomAccessGraph + UnderlyingGraph, BG: UnderlyingGraph>
974    SwhBidirectionalGraph<P, FG, BG>
975{
976    /// Consumes this graph and returns a new one that implements [`SwhLabeledForwardGraph`]
977    pub fn load_forward_labels(self) -> Result<SwhBidirectionalGraph<P, Zip<FG, SwhLabeling>, BG>> {
978        Ok(SwhBidirectionalGraph {
979            forward_graph: zip_labels(self.forward_graph, suffix_path(&self.basepath, "-labelled"))
980                .context("Could not load forward labels")?,
981            backward_graph: self.backward_graph,
982            properties: self.properties,
983            basepath: self.basepath,
984        })
985    }
986}
987
988impl<P, FG: UnderlyingGraph, BG: RandomAccessGraph + UnderlyingGraph>
989    SwhBidirectionalGraph<P, FG, BG>
990{
991    /// Consumes this graph and returns a new one that implements [`SwhLabeledBackwardGraph`]
992    pub fn load_backward_labels(
993        self,
994    ) -> Result<SwhBidirectionalGraph<P, FG, Zip<BG, SwhLabeling>>> {
995        Ok(SwhBidirectionalGraph {
996            forward_graph: self.forward_graph,
997            backward_graph: zip_labels(
998                self.backward_graph,
999                // Note: keep british version of "labelled" here for compatibility with Java swh-graph
1000                suffix_path(&self.basepath, "-transposed-labelled"),
1001            )
1002            .context("Could not load backward labels")?,
1003            properties: self.properties,
1004            basepath: self.basepath,
1005        })
1006    }
1007}
1008
1009impl<P, FG: RandomAccessGraph + UnderlyingGraph, BG: RandomAccessGraph + UnderlyingGraph>
1010    SwhBidirectionalGraph<P, FG, BG>
1011{
1012    /// Equivalent to calling both
1013    /// [`load_forward_labels`](SwhBidirectionalGraph::load_forward_labels) and
1014    /// [`load_backward_labels`](SwhBidirectionalGraph::load_backward_labels)
1015    pub fn load_labels(
1016        self,
1017    ) -> Result<SwhBidirectionalGraph<P, Zip<FG, SwhLabeling>, Zip<BG, SwhLabeling>>> {
1018        self.load_forward_labels()
1019            .context("Could not load forward labels")?
1020            .load_backward_labels()
1021            .context("Could not load backward labels")
1022    }
1023}
1024
1025#[deprecated(
1026    since = "5.2.0",
1027    note = "please use `SwhUnidirectionalGraph::new` instead"
1028)]
1029/// Deprecated alias for [`SwhUnidirectionalGraph::new`]
1030pub fn load_unidirectional(basepath: impl AsRef<Path>) -> Result<SwhUnidirectionalGraph<()>> {
1031    SwhUnidirectionalGraph::new(basepath)
1032}
1033
1034#[deprecated(
1035    since = "5.2.0",
1036    note = "please use `SwhBidirectionalGraph::new` instead"
1037)]
1038/// Deprecated alias for [`SwhBidirectionalGraph::new`]
1039pub fn load_bidirectional(basepath: impl AsRef<Path>) -> Result<SwhBidirectionalGraph<()>> {
1040    SwhBidirectionalGraph::new(basepath)
1041}
1042
1043/// Loads a bidirectional graph, then all its properties, then its labels.
1044///
1045/// This is a shorthand for:
1046///
1047/// ```no_run
1048/// # use std::path::PathBuf;
1049/// # use anyhow::{Context, Result};
1050/// # use swh_graph::graph::SwhBidirectionalGraph;
1051/// # use swh_graph::mph::LoadableSwhidMphf;
1052/// #
1053/// # fn f<MPHF: LoadableSwhidMphf>() -> Result<()> {
1054/// # let basepath = PathBuf::from("./graph");
1055/// let graph = SwhBidirectionalGraph::new(basepath)
1056///     .context("Could not load graph")?
1057///     .load_all_properties::<MPHF>()
1058///     .context("Could not load properties")?
1059///     .load_labels()
1060///     .context("Could not load labels")?;
1061/// # Ok(())
1062/// # }
1063/// ```
1064///
1065/// When working on code made to be re-distributed, you should load only what is needed
1066/// (ie. load unidirectional if you don't need the backward graph, don't load properties
1067/// that are not needed, don't load labels unless they need to be read), so users can
1068/// run your code without a complete graph locally.
1069pub fn load_full<MPHF: LoadableSwhidMphf>(
1070    basepath: impl AsRef<Path>,
1071) -> Result<
1072    SwhBidirectionalGraph<
1073        properties::SwhGraphProperties<
1074            properties::MappedMaps<MPHF>,
1075            properties::MappedTimestamps,
1076            properties::MappedPersons,
1077            properties::MappedContents,
1078            properties::MappedStrings,
1079            properties::MappedLabelNames,
1080        >,
1081        Zip<DefaultUnderlyingGraph, SwhLabeling>,
1082        Zip<DefaultUnderlyingGraph, SwhLabeling>,
1083    >,
1084> {
1085    SwhBidirectionalGraph::new(basepath)
1086        .context("Could not load graph")?
1087        .load_all_properties()
1088        .context("Could not load properties")?
1089        .load_labels()
1090        .context("Could not load labels")
1091}
1092
1093fn zip_labels<G: RandomAccessGraph + UnderlyingGraph, P: AsRef<Path>>(
1094    graph: G,
1095    base_path: P,
1096) -> Result<Zip<G, SwhLabeling>> {
1097    let properties_path = suffix_path(&base_path, ".properties");
1098    let f = std::fs::File::open(&properties_path)
1099        .with_context(|| format!("Could not open {}", properties_path.display()))?;
1100    let map = java_properties::read(std::io::BufReader::new(f))?;
1101
1102    let graphclass = map
1103        .get("graphclass")
1104        .with_context(|| format!("Missing 'graphclass' from {}", properties_path.display()))?;
1105    // Note: keep british version of "labelled" here for compatibility with Java swh-graph
1106    if graphclass != "it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph" {
1107        bail!(
1108            "Expected graphclass in {} to be \"it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph\", got {:?}",
1109            properties_path.display(),
1110            graphclass
1111        );
1112    }
1113
1114    let labelspec = map
1115        .get("labelspec")
1116        .with_context(|| format!("Missing 'labelspec' from {}", properties_path.display()))?;
1117    let width = labelspec
1118        .strip_prefix("org.softwareheritage.graph.labels.SwhLabel(DirEntry,")
1119        .and_then(|labelspec| labelspec.strip_suffix(')'))
1120        .and_then(|labelspec| labelspec.parse::<usize>().ok());
1121    let width = match width {
1122        None =>
1123        bail!("Expected labelspec in {} to be \"org.softwareheritage.graph.labels.SwhLabel(DirEntry,<integer>)\" (where <integer> is a small integer, usually under 30), got {:?}", properties_path.display(), labelspec),
1124        Some(width) => width
1125    };
1126
1127    let labels = crate::labeling::mmap(&base_path, crate::labeling::SwhDeserializer::new(width))
1128        .with_context(|| {
1129            format!(
1130                "Could not load labeling from {}",
1131                base_path.as_ref().display()
1132            )
1133        })?;
1134
1135    Ok(Zip(graph, labels))
1136}