Skip to main content

swh_graph/
graph.rs

1// Copyright (C) 2023-2026  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6//! Structures to manipulate the Software Heritage graph
7//!
8//! In order to load only what is necessary, these structures are initially created
9//! by calling [`SwhUnidirectionalGraph::new`] or [`SwhBidirectionalGraph::new`], then calling methods
10//! on them to progressively load additional data (`load_properties`, `load_all_properties`,
11//! `load_labels`)
12
13#![allow(clippy::type_complexity)]
14
15use std::borrow::Borrow;
16use std::collections::HashMap;
17use std::iter::Iterator;
18use std::path::{Path, PathBuf};
19
20use anyhow::{anyhow, bail, Context, Result};
21use dsi_progress_logger::{ConcurrentProgressLog, ProgressLog};
22use rayon::prelude::*;
23use webgraph::graphs::vec_graph::LabeledVecGraph;
24use webgraph::prelude::*;
25
26use crate::arc_iterators::{
27    DelabelingIterator, LabelTypingSuccessorIterator, LabeledArcIterator, LabeledSuccessorIterator,
28};
29// Useful trait to import as part of `use swh_graph::graph::*;`
30pub use crate::arc_iterators::IntoFlattenedLabeledArcsIterator;
31use crate::labeling::SwhLabeling;
32use crate::labels::{EdgeLabel, UntypedEdgeLabel};
33use crate::mph::LoadableSwhidMphf;
34use crate::properties;
35pub use crate::underlying_graph::DefaultUnderlyingGraph;
36use crate::utils::shuffle::par_iter_shuffled_range;
37use crate::utils::suffix_path;
38use crate::NodeType;
39
40/// Alias for [`usize`], which may become a newtype in a future version.
41pub type NodeId = usize;
42
43/// Supertrait of [`RandomAccessLabeling`] with methods to act like a [`RandomAccessGraph`].
44///
45/// If `Self` implements [`RandomAccessGraph`], then this is implemented as no-ops.
46/// Otherwise, it "unpeels" layers of zipping until it reaches the graph at the bottom:
47///
48/// - If `Self` is `Zip<L, R>`, this defers to `L` (aka. `Left<Zip<L, R>`).
49/// - If `Self` is `VecGraph` or `LabeledVecGraph<_>, this does the equivalent of deferring to `VecGraph`
50///   (through [`DelabelingIterator`] because it cannot create a new `VecGraph` without copying)
51pub trait UnderlyingGraph: RandomAccessLabeling {
52    type UnlabeledSuccessors<'succ>: IntoIterator<Item = NodeId>
53    where
54        Self: 'succ;
55
56    /// Workaround for some implementations of `<Self as RandomAccessLabeling>::num_arcs`
57    /// being missing
58    ///
59    /// `Zip::num_arcs` runs `assert_eq!(self.0.num_arcs(), self.1.num_arcs());`
60    /// but `BitStreamLabeling::num_arcs` always panics as of
61    /// f460742fe0f776df2248a5f09a3425b81eb70b07, so we can't use that.
62    fn num_arcs(&self) -> u64;
63    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool;
64    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_>;
65}
66
67impl<F: RandomAccessDecoderFactory> UnderlyingGraph for BvGraph<F> {
68    type UnlabeledSuccessors<'succ>
69        = <Self as RandomAccessLabeling>::Labels<'succ>
70    where
71        Self: 'succ;
72
73    fn num_arcs(&self) -> u64 {
74        <Self as RandomAccessLabeling>::num_arcs(self)
75    }
76    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
77        <Self as RandomAccessGraph>::has_arc(self, src_node_id, dst_node_id)
78    }
79    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_> {
80        <Self as RandomAccessGraph>::successors(self, node_id)
81    }
82}
83
84impl<G: UnderlyingGraph, L: RandomAccessLabeling> UnderlyingGraph for Zip<G, L> {
85    type UnlabeledSuccessors<'succ>
86        = <G as UnderlyingGraph>::UnlabeledSuccessors<'succ>
87    where
88        Self: 'succ;
89
90    fn num_arcs(&self) -> u64 {
91        <G as UnderlyingGraph>::num_arcs(&self.0)
92    }
93    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
94        self.0.has_arc(src_node_id, dst_node_id)
95    }
96    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_> {
97        self.0.unlabeled_successors(node_id)
98    }
99}
100
101impl UnderlyingGraph for VecGraph {
102    type UnlabeledSuccessors<'succ>
103        = <Self as RandomAccessLabeling>::Labels<'succ>
104    where
105        Self: 'succ;
106
107    fn num_arcs(&self) -> u64 {
108        <Self as RandomAccessLabeling>::num_arcs(self)
109    }
110    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
111        <Self as RandomAccessGraph>::has_arc(self, src_node_id, dst_node_id)
112    }
113    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_> {
114        <Self as RandomAccessGraph>::successors(self, node_id)
115    }
116}
117
118impl<L: Clone> UnderlyingGraph for LabeledVecGraph<L> {
119    type UnlabeledSuccessors<'succ>
120        = DelabelingIterator<<Self as RandomAccessLabeling>::Labels<'succ>>
121    where
122        Self: 'succ;
123
124    fn num_arcs(&self) -> u64 {
125        <Self as RandomAccessLabeling>::num_arcs(self)
126    }
127    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
128        for succ in self.unlabeled_successors(src_node_id) {
129            if succ == dst_node_id {
130                return true;
131            }
132        }
133        false
134    }
135    fn unlabeled_successors(&self, node_id: NodeId) -> Self::UnlabeledSuccessors<'_> {
136        DelabelingIterator {
137            successors: <Self as RandomAccessLabeling>::labels(self, node_id),
138        }
139    }
140}
141
142pub trait SwhGraph {
143    /// Return the base path of the graph
144    fn path(&self) -> &Path;
145
146    /// Returns whether the graph is in the `ori->snp->rel,rev->dir->cnt` direction
147    /// (with a few `dir->rev` arcs)
148    fn is_transposed(&self) -> bool;
149
150    /// Return the largest node id in the graph plus one.
151    ///
152    /// For graphs directly loaded from disk this is the actual number of nodes,
153    /// but it is an overestimate for some graphs (eg. [`Subgraph`](crate::views::Subgraph)).
154    ///
155    /// Use [`actual_num_nodes`](Self::actual_num_nodes) if you need the exact number of
156    /// nodes in the graph.
157    fn num_nodes(&self) -> usize;
158
159    /// Returns whether the given node id exists in the graph
160    ///
161    /// This is usually true iff `node_id < self.num_nodes()`, but may be false
162    /// when using a filtered view such as [`Subgraph`](crate::views::Subgraph).
163    fn has_node(&self, node_id: NodeId) -> bool {
164        node_id < self.num_nodes()
165    }
166
167    /// Return the number of arcs in the graph.
168    fn num_arcs(&self) -> u64;
169
170    /// Returns the number of nodes of each type, if known.
171    fn num_nodes_by_type(&self) -> Result<HashMap<NodeType, usize>> {
172        let path = self.path().with_extension("nodes.stats.txt");
173        std::fs::read_to_string(&path)
174            .with_context(|| format!("Could not read {}", path.display()))?
175            .lines()
176            .map(|line| {
177                let (type_, count) = line
178                    .split_once(' ')
179                    .with_context(|| format!("Unexpected line in {}: {}", path.display(), line))?;
180                let type_: NodeType = type_
181                    .parse()
182                    .map_err(|_| anyhow!("Unknown node type in {}: {}", path.display(), type_))?;
183                let count = count
184                    .parse()
185                    .map_err(|_| anyhow!("Invalid integer in {}: {}", path.display(), count))?;
186                Ok((type_, count))
187            })
188            .collect()
189    }
190    /// Returns the number of nodes in the graph, if known.
191    fn actual_num_nodes(&self) -> Result<usize> {
192        Ok(self.num_nodes_by_type()?.values().sum())
193    }
194    /// Returns the number of arcs of each type, if known.
195    fn num_arcs_by_type(&self) -> Result<HashMap<(NodeType, NodeType), usize>> {
196        let path = self.path().with_extension("edges.stats.txt");
197        std::fs::read_to_string(&path)
198            .with_context(|| format!("Could not read {}", path.display()))?
199            .lines()
200            .map(|line| {
201                let (type_, count) = line
202                    .split_once(' ')
203                    .with_context(|| format!("Unexpected line in {}: {}", path.display(), line))?;
204                let (src_type, dst_type) = type_
205                    .split_once(':')
206                    .with_context(|| format!("Unexpected line in {}: {}", path.display(), line))?;
207                let src_type: NodeType = src_type.parse().map_err(|_| {
208                    anyhow!("Unknown node type in {}: {}", path.display(), src_type)
209                })?;
210                let dst_type: NodeType = dst_type.parse().map_err(|_| {
211                    anyhow!("Unknown node type in {}: {}", path.display(), dst_type)
212                })?;
213                let count = count
214                    .parse()
215                    .map_err(|_| anyhow!("Invalid integer in {}: {}", path.display(), count))?;
216                Ok(((src_type, dst_type), count))
217            })
218            .collect()
219    }
220
221    /// Return whether there is an arc going from `src_node_id` to `dst_node_id`.
222    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool;
223
224    /// Returns an iterator on all the nodes
225    ///
226    /// Order is not guaranteed.
227    ///
228    /// Updates the progress logger on every node id from 0 to `self.num_nodes()`,
229    /// even those that are filtered out.
230    fn iter_nodes<'a>(
231        &'a self,
232        mut pl: impl ProgressLog + 'a,
233    ) -> impl Iterator<Item = NodeId> + 'a {
234        (0..self.num_nodes())
235            .inspect(move |_| pl.light_update())
236            .filter(|&node_id| self.has_node(node_id))
237    }
238    /// Returns a parallel iterator on all the nodes
239    ///
240    /// Order is not guaranteed.
241    ///
242    /// Updates the progress logger on every node id from 0 to `self.num_nodes()`,
243    /// even those that are filtered out.
244    fn par_iter_nodes<'a>(
245        &'a self,
246        pl: impl ConcurrentProgressLog + 'a,
247    ) -> impl ParallelIterator<Item = NodeId> + 'a
248    where
249        Self: Sync,
250    {
251        par_iter_shuffled_range(0..self.num_nodes())
252            .map_with(pl, move |pl, node_id| {
253                pl.update();
254                node_id
255            })
256            .filter(|&node_id| self.has_node(node_id))
257    }
258}
259
260pub trait SwhForwardGraph: SwhGraph {
261    type Successors<'succ>: IntoIterator<Item = usize>
262    where
263        Self: 'succ;
264
265    /// Return an [`IntoIterator`] over the successors of a node.
266    fn successors(&self, node_id: NodeId) -> Self::Successors<'_>;
267    /// Return the number of successors of a node.
268    fn outdegree(&self, node_id: NodeId) -> usize;
269}
270
271#[diagnostic::on_unimplemented(
272    label = "does not have forward labels loaded",
273    note = "Use `let graph = graph.load_labels()` to load them"
274)]
275pub trait SwhLabeledForwardGraph:
276    SwhForwardGraph + SwhGraphWithProperties<Maps: properties::Maps>
277{
278    type LabeledArcs<'arc>: Iterator<Item = UntypedEdgeLabel>
279    where
280        Self: 'arc;
281    type LabeledSuccessors<'node>: IntoIterator<Item = (usize, Self::LabeledArcs<'node>)>
282        + IntoFlattenedLabeledArcsIterator<UntypedEdgeLabel>
283    where
284        Self: 'node;
285
286    /// Return an [`IntoIterator`] over the successors of a node along with a list of labels
287    /// of each arc
288    fn untyped_labeled_successors(&self, node_id: NodeId) -> Self::LabeledSuccessors<'_>;
289
290    /// Return an [`IntoIterator`] over the successors of a node along with a list of labels
291    /// of each arc
292    fn labeled_successors(
293        &self,
294        node_id: NodeId,
295    ) -> impl Iterator<Item = (usize, impl Iterator<Item = EdgeLabel>)>
296           + IntoFlattenedLabeledArcsIterator<EdgeLabel>
297           + '_ {
298        LabelTypingSuccessorIterator {
299            graph: self,
300            is_transposed: false,
301            src: node_id,
302            successors: self.untyped_labeled_successors(node_id).into_iter(),
303        }
304    }
305}
306
307#[diagnostic::on_unimplemented(
308    label = "does not have backward arcs loaded",
309    note = "Use SwhBidirectionalGraph instead of SwhUnidirectionalGraph if you don't already"
310)]
311pub trait SwhBackwardGraph: SwhGraph {
312    type Predecessors<'succ>: IntoIterator<Item = usize>
313    where
314        Self: 'succ;
315
316    /// Return an [`IntoIterator`] over the predecessors of a node.
317    fn predecessors(&self, node_id: NodeId) -> Self::Predecessors<'_>;
318    /// Return the number of predecessors of a node.
319    fn indegree(&self, node_id: NodeId) -> usize;
320}
321
322#[diagnostic::on_unimplemented(
323    label = "does not have backward labels loaded",
324    note = "Use SwhBidirectionalGraph instead of SwhUnidirectionalGraph if you don't already",
325    note = "and `let graph = graph.load_labels()`"
326)]
327pub trait SwhLabeledBackwardGraph:
328    SwhBackwardGraph + SwhGraphWithProperties<Maps: properties::Maps>
329{
330    type LabeledArcs<'arc>: Iterator<Item = UntypedEdgeLabel>
331    where
332        Self: 'arc;
333    type LabeledPredecessors<'node>: IntoIterator<Item = (usize, Self::LabeledArcs<'node>)>
334        + IntoFlattenedLabeledArcsIterator<UntypedEdgeLabel>
335    where
336        Self: 'node;
337
338    /// Return an [`IntoIterator`] over the predecessors of a node along with a list of labels
339    /// of each arc
340    fn untyped_labeled_predecessors(&self, node_id: NodeId) -> Self::LabeledPredecessors<'_>;
341
342    /// Return an [`IntoIterator`] over the predecessors of a node along with a list of labels
343    /// of each arc
344    fn labeled_predecessors(
345        &self,
346        node_id: NodeId,
347    ) -> impl IntoIterator<Item = (usize, impl Iterator<Item = crate::labels::EdgeLabel>)>
348           + IntoFlattenedLabeledArcsIterator<EdgeLabel>
349           + '_ {
350        LabelTypingSuccessorIterator {
351            graph: self,
352            is_transposed: true,
353            src: node_id,
354            successors: self.untyped_labeled_predecessors(node_id).into_iter(),
355        }
356    }
357}
358
359pub trait SwhGraphWithProperties: SwhGraph {
360    type Maps: properties::MaybeMaps;
361    type Timestamps: properties::MaybeTimestamps;
362    type Persons: properties::MaybePersons;
363    type Contents: properties::MaybeContents;
364    type Strings: properties::MaybeStrings;
365    type LabelNames: properties::MaybeLabelNames;
366
367    fn properties(
368        &self,
369    ) -> &properties::SwhGraphProperties<
370        Self::Maps,
371        Self::Timestamps,
372        Self::Persons,
373        Self::Contents,
374        Self::Strings,
375        Self::LabelNames,
376    >;
377}
378
379/// Alias for structures representing a graph with all arcs, arc labels, and node properties
380/// loaded.
381pub trait SwhFullGraph:
382    SwhLabeledForwardGraph
383    + SwhLabeledBackwardGraph
384    + SwhGraphWithProperties<
385        Maps: properties::Maps,
386        Timestamps: properties::Timestamps,
387        Persons: properties::Persons,
388        Contents: properties::Contents,
389        Strings: properties::Strings,
390        LabelNames: properties::LabelNames,
391    >
392{
393}
394
395impl<
396        G: SwhLabeledForwardGraph
397            + SwhLabeledBackwardGraph
398            + SwhGraphWithProperties<
399                Maps: properties::Maps,
400                Timestamps: properties::Timestamps,
401                Persons: properties::Persons,
402                Contents: properties::Contents,
403                Strings: properties::Strings,
404                LabelNames: properties::LabelNames,
405            >,
406    > SwhFullGraph for G
407{
408}
409
410/// Class representing the compressed Software Heritage graph in a single direction.
411///
412/// Type parameters:
413///
414/// * `P` is either `()` or `properties::SwhGraphProperties`, manipulated using
415///   [`load_properties`](SwhUnidirectionalGraph::load_properties) and
416///   [`load_all_properties`](SwhUnidirectionalGraph::load_all_properties)
417/// * G is the forward graph (either [`BvGraph`], or `Zip<BvGraph, SwhLabeling>`
418///   [`load_labels`](SwhUnidirectionalGraph::load_labels)
419pub struct SwhUnidirectionalGraph<P, G: UnderlyingGraph = DefaultUnderlyingGraph> {
420    basepath: PathBuf,
421    graph: G,
422    properties: P,
423}
424
425impl SwhUnidirectionalGraph<()> {
426    pub fn new(basepath: impl AsRef<Path>) -> Result<Self> {
427        let basepath = basepath.as_ref().to_owned();
428        let graph = DefaultUnderlyingGraph::new(&basepath)?;
429        Ok(Self::from_underlying_graph(basepath, graph))
430    }
431}
432
433impl<G: UnderlyingGraph> SwhUnidirectionalGraph<(), G> {
434    pub fn from_underlying_graph(basepath: PathBuf, graph: G) -> Self {
435        SwhUnidirectionalGraph {
436            basepath,
437            graph,
438            properties: (),
439        }
440    }
441}
442
443impl<P, G: UnderlyingGraph> SwhGraph for SwhUnidirectionalGraph<P, G> {
444    fn path(&self) -> &Path {
445        self.basepath.as_path()
446    }
447
448    fn is_transposed(&self) -> bool {
449        // Technically, users can load the 'graph-transposed' directly.
450        // However, unless they rename files, this will fail to load properties, because
451        // properties' file names wouldn't match the base path.
452        // As 'is_transposed' is only useful when checking node types (of an arc),
453        // this function is unlikely to be called in that scenario, so this should be fine.
454        false
455    }
456
457    fn num_nodes(&self) -> usize {
458        self.graph.num_nodes()
459    }
460
461    fn num_arcs(&self) -> u64 {
462        UnderlyingGraph::num_arcs(&self.graph)
463    }
464
465    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
466        self.graph.has_arc(src_node_id, dst_node_id)
467    }
468}
469
470impl<P, G: UnderlyingGraph> SwhForwardGraph for SwhUnidirectionalGraph<P, G> {
471    type Successors<'succ>
472        = <G as UnderlyingGraph>::UnlabeledSuccessors<'succ>
473    where
474        Self: 'succ;
475
476    /// Return an [`IntoIterator`] over the successors of a node.
477    fn successors(&self, node_id: NodeId) -> Self::Successors<'_> {
478        self.graph.unlabeled_successors(node_id)
479    }
480
481    /// Return the number of successors of a node.
482    fn outdegree(&self, node_id: NodeId) -> usize {
483        self.graph.outdegree(node_id)
484    }
485}
486
487impl<P, G: UnderlyingGraph> SwhLabeledForwardGraph for SwhUnidirectionalGraph<P, G>
488where
489    <G as SequentialLabeling>::Label: Pair<Left = NodeId, Right: IntoIterator<Item: Borrow<u64>>>,
490    for<'succ> <G as RandomAccessLabeling>::Labels<'succ>:
491        Iterator<Item = (usize, <<G as SequentialLabeling>::Label as Pair>::Right)>,
492    Self: SwhGraphWithProperties<Maps: properties::Maps>,
493{
494    type LabeledArcs<'arc> = LabeledArcIterator<<<<<G as RandomAccessLabeling>::Labels<'arc> as Iterator>::Item as Pair>::Right as IntoIterator>::IntoIter> where Self: 'arc;
495    type LabeledSuccessors<'succ>
496        = LabeledSuccessorIterator<<G as RandomAccessLabeling>::Labels<'succ>>
497    where
498        Self: 'succ;
499
500    fn untyped_labeled_successors(&self, node_id: NodeId) -> Self::LabeledSuccessors<'_> {
501        LabeledSuccessorIterator {
502            successors: self.graph.labels(node_id),
503        }
504    }
505}
506
507impl<
508        M: properties::MaybeMaps,
509        T: properties::MaybeTimestamps,
510        P: properties::MaybePersons,
511        C: properties::MaybeContents,
512        S: properties::MaybeStrings,
513        N: properties::MaybeLabelNames,
514        G: UnderlyingGraph,
515    > SwhUnidirectionalGraph<properties::SwhGraphProperties<M, T, P, C, S, N>, G>
516{
517    /// Enriches the graph with more properties mmapped from disk
518    ///
519    /// # Example
520    ///
521    /// ```no_run
522    /// # use std::path::PathBuf;
523    /// use swh_graph::java_compat::mph::gov::GOVMPH;
524    ///
525    /// swh_graph::graph::SwhUnidirectionalGraph::new(PathBuf::from("./graph"))
526    ///     .expect("Could not load graph")
527    ///     .init_properties()
528    ///     .load_properties(|properties| properties.load_maps::<GOVMPH>())
529    ///     .expect("Could not load SWHID maps")
530    ///     .load_properties(|properties| properties.load_timestamps())
531    ///     .expect("Could not load timestamps");
532    /// ```
533    pub fn load_properties<
534        M2: properties::MaybeMaps,
535        T2: properties::MaybeTimestamps,
536        P2: properties::MaybePersons,
537        C2: properties::MaybeContents,
538        S2: properties::MaybeStrings,
539        N2: properties::MaybeLabelNames,
540    >(
541        self,
542        loader: impl FnOnce(
543            properties::SwhGraphProperties<M, T, P, C, S, N>,
544        ) -> Result<properties::SwhGraphProperties<M2, T2, P2, C2, S2, N2>>,
545    ) -> Result<SwhUnidirectionalGraph<properties::SwhGraphProperties<M2, T2, P2, C2, S2, N2>, G>>
546    {
547        Ok(SwhUnidirectionalGraph {
548            properties: loader(self.properties)?,
549            basepath: self.basepath,
550            graph: self.graph,
551        })
552    }
553}
554
555impl<G: UnderlyingGraph> SwhUnidirectionalGraph<(), G> {
556    /// Prerequisite for `load_properties`
557    pub fn init_properties(
558        self,
559    ) -> SwhUnidirectionalGraph<
560        properties::SwhGraphProperties<
561            properties::NoMaps,
562            properties::NoTimestamps,
563            properties::NoPersons,
564            properties::NoContents,
565            properties::NoStrings,
566            properties::NoLabelNames,
567        >,
568        G,
569    > {
570        SwhUnidirectionalGraph {
571            properties: properties::SwhGraphProperties::new(&self.basepath, self.graph.num_nodes()),
572            basepath: self.basepath,
573            graph: self.graph,
574        }
575    }
576
577    /// Enriches the graph with all properties, mmapped from disk
578    ///
579    /// # Example
580    ///
581    /// ```no_run
582    /// # use std::path::PathBuf;
583    /// use swh_graph::java_compat::mph::gov::GOVMPH;
584    ///
585    /// swh_graph::graph::SwhUnidirectionalGraph::new(PathBuf::from("./graph"))
586    ///     .expect("Could not load graph")
587    ///     .load_all_properties::<GOVMPH>()
588    ///     .expect("Could not load properties");
589    /// ```
590    pub fn load_all_properties<MPHF: LoadableSwhidMphf>(
591        self,
592    ) -> Result<
593        SwhUnidirectionalGraph<
594            properties::SwhGraphProperties<
595                properties::MappedMaps<MPHF>,
596                properties::MappedTimestamps,
597                properties::MappedPersons,
598                properties::MappedContents,
599                properties::MappedStrings,
600                properties::MappedLabelNames,
601            >,
602            G,
603        >,
604    > {
605        self.init_properties()
606            .load_properties(|properties| properties.load_all())
607    }
608}
609
610impl<
611        MAPS: properties::MaybeMaps,
612        TIMESTAMPS: properties::MaybeTimestamps,
613        PERSONS: properties::MaybePersons,
614        CONTENTS: properties::MaybeContents,
615        STRINGS: properties::MaybeStrings,
616        LABELNAMES: properties::MaybeLabelNames,
617        G: UnderlyingGraph,
618    > SwhGraphWithProperties
619    for SwhUnidirectionalGraph<
620        properties::SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>,
621        G,
622    >
623{
624    type Maps = MAPS;
625    type Timestamps = TIMESTAMPS;
626    type Persons = PERSONS;
627    type Contents = CONTENTS;
628    type Strings = STRINGS;
629    type LabelNames = LABELNAMES;
630
631    fn properties(
632        &self,
633    ) -> &properties::SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>
634    {
635        &self.properties
636    }
637}
638
639impl<P, G: RandomAccessGraph + UnderlyingGraph> SwhUnidirectionalGraph<P, G> {
640    /// Consumes this graph and returns a new one that implements [`SwhLabeledForwardGraph`]
641    pub fn load_labels(self) -> Result<SwhUnidirectionalGraph<P, Zip<G, SwhLabeling>>> {
642        Ok(SwhUnidirectionalGraph {
643            // Note: keep british version of "labelled" here for compatibility with Java swh-graph
644            graph: zip_labels(self.graph, suffix_path(&self.basepath, "-labelled"))
645                .context("Could not load forward labels")?,
646            properties: self.properties,
647            basepath: self.basepath,
648        })
649    }
650}
651
652/// Class representing the compressed Software Heritage graph in both directions.
653///
654/// Type parameters:
655///
656/// * `P` is either `()` or `properties::SwhGraphProperties`, manipulated using
657///   [`load_properties`](SwhBidirectionalGraph::load_properties) and
658///   [`load_all_properties`](SwhBidirectionalGraph::load_all_properties)
659/// * FG is the forward graph (either [`BvGraph`], or `Zip<BvGraph, SwhLabeling>`
660///   after using [`load_forward_labels`](SwhBidirectionalGraph::load_forward_labels)
661/// * BG is the backward graph (either [`BvGraph`], or `Zip<BvGraph, SwhLabeling>`
662///   after using [`load_backward_labels`](SwhBidirectionalGraph::load_backward_labels)
663pub struct SwhBidirectionalGraph<
664    P,
665    FG: UnderlyingGraph = DefaultUnderlyingGraph,
666    BG: UnderlyingGraph = DefaultUnderlyingGraph,
667> {
668    basepath: PathBuf,
669    forward_graph: FG,
670    backward_graph: BG,
671    properties: P,
672}
673
674impl SwhBidirectionalGraph<()> {
675    pub fn new(basepath: impl AsRef<Path>) -> Result<Self> {
676        let basepath = basepath.as_ref().to_owned();
677        let forward_graph = DefaultUnderlyingGraph::new(&basepath)?;
678        let backward_graph = DefaultUnderlyingGraph::new(suffix_path(&basepath, "-transposed"))?;
679        Ok(Self::from_underlying_graphs(
680            basepath,
681            forward_graph,
682            backward_graph,
683        ))
684    }
685}
686
687impl<FG: UnderlyingGraph, BG: UnderlyingGraph> SwhBidirectionalGraph<(), FG, BG> {
688    pub fn from_underlying_graphs(
689        basepath: PathBuf,
690        forward_graph: FG,
691        backward_graph: BG,
692    ) -> Self {
693        SwhBidirectionalGraph {
694            basepath,
695            backward_graph,
696            forward_graph,
697            properties: (),
698        }
699    }
700}
701
702impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhGraph for SwhBidirectionalGraph<P, FG, BG> {
703    fn path(&self) -> &Path {
704        self.basepath.as_path()
705    }
706
707    fn is_transposed(&self) -> bool {
708        // Technically, users can load the 'graph-transposed' directly.
709        // However, unless they rename files, this will fail to load properties, because
710        // properties' file names wouldn't match the base path.
711        // As 'is_transposed' is only useful when checking node types (of an arc),
712        // this function is unlikely to be called in that scenario, so this should be fine.
713        false
714    }
715
716    fn num_nodes(&self) -> usize {
717        self.forward_graph.num_nodes()
718    }
719
720    fn num_arcs(&self) -> u64 {
721        UnderlyingGraph::num_arcs(&self.forward_graph)
722    }
723
724    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
725        self.forward_graph.has_arc(src_node_id, dst_node_id)
726    }
727}
728
729impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhForwardGraph
730    for SwhBidirectionalGraph<P, FG, BG>
731{
732    type Successors<'succ>
733        = <FG as UnderlyingGraph>::UnlabeledSuccessors<'succ>
734    where
735        Self: 'succ;
736
737    fn successors(&self, node_id: NodeId) -> Self::Successors<'_> {
738        self.forward_graph.unlabeled_successors(node_id)
739    }
740    fn outdegree(&self, node_id: NodeId) -> usize {
741        self.forward_graph.outdegree(node_id)
742    }
743}
744
745impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhLabeledForwardGraph
746    for SwhBidirectionalGraph<P, FG, BG>
747where
748    <FG as SequentialLabeling>::Label: Pair<Left = NodeId, Right: IntoIterator<Item: Borrow<u64>>>,
749    for<'succ> <FG as RandomAccessLabeling>::Labels<'succ>:
750        Iterator<Item = (usize, <<FG as SequentialLabeling>::Label as Pair>::Right)>,
751    Self: SwhGraphWithProperties<Maps: properties::Maps>,
752{
753    type LabeledArcs<'arc> = LabeledArcIterator<<<<<FG as RandomAccessLabeling>::Labels<'arc> as Iterator>::Item as Pair>::Right as IntoIterator>::IntoIter> where Self: 'arc;
754    type LabeledSuccessors<'succ>
755        = LabeledSuccessorIterator<<FG as RandomAccessLabeling>::Labels<'succ>>
756    where
757        Self: 'succ;
758
759    fn untyped_labeled_successors(&self, node_id: NodeId) -> Self::LabeledSuccessors<'_> {
760        LabeledSuccessorIterator {
761            successors: self.forward_graph.labels(node_id),
762        }
763    }
764}
765
766impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhBackwardGraph
767    for SwhBidirectionalGraph<P, FG, BG>
768{
769    type Predecessors<'succ>
770        = <BG as UnderlyingGraph>::UnlabeledSuccessors<'succ>
771    where
772        Self: 'succ;
773
774    fn predecessors(&self, node_id: NodeId) -> Self::Predecessors<'_> {
775        self.backward_graph.unlabeled_successors(node_id)
776    }
777
778    fn indegree(&self, node_id: NodeId) -> usize {
779        self.backward_graph.outdegree(node_id)
780    }
781}
782
783impl<P, FG: UnderlyingGraph, BG: UnderlyingGraph> SwhLabeledBackwardGraph
784    for SwhBidirectionalGraph<P, FG, BG>
785where
786    <BG as SequentialLabeling>::Label: Pair<Left = NodeId, Right: IntoIterator<Item: Borrow<u64>>>,
787    for<'succ> <BG as RandomAccessLabeling>::Labels<'succ>:
788        Iterator<Item = (usize, <<BG as SequentialLabeling>::Label as Pair>::Right)>,
789    Self: SwhGraphWithProperties<Maps: properties::Maps>,
790{
791    type LabeledArcs<'arc> = LabeledArcIterator<<<<<BG as RandomAccessLabeling>::Labels<'arc> as Iterator>::Item as Pair>::Right as IntoIterator>::IntoIter> where Self: 'arc;
792    type LabeledPredecessors<'succ>
793        = LabeledSuccessorIterator<<BG as RandomAccessLabeling>::Labels<'succ>>
794    where
795        Self: 'succ;
796
797    fn untyped_labeled_predecessors(&self, node_id: NodeId) -> Self::LabeledPredecessors<'_> {
798        LabeledSuccessorIterator {
799            successors: self.backward_graph.labels(node_id),
800        }
801    }
802}
803
804impl<
805        M: properties::MaybeMaps,
806        T: properties::MaybeTimestamps,
807        P: properties::MaybePersons,
808        C: properties::MaybeContents,
809        S: properties::MaybeStrings,
810        N: properties::MaybeLabelNames,
811        BG: UnderlyingGraph,
812        FG: UnderlyingGraph,
813    > SwhBidirectionalGraph<properties::SwhGraphProperties<M, T, P, C, S, N>, FG, BG>
814{
815    /// Enriches the graph with more properties mmapped from disk
816    ///
817    /// # Example
818    ///
819    /// ```no_run
820    /// # use std::path::PathBuf;
821    /// use swh_graph::java_compat::mph::gov::GOVMPH;
822    /// use swh_graph::SwhGraphProperties;
823    ///
824    /// swh_graph::graph::SwhBidirectionalGraph::new(PathBuf::from("./graph"))
825    ///     .expect("Could not load graph")
826    ///     .init_properties()
827    ///     .load_properties(SwhGraphProperties::load_maps::<GOVMPH>)
828    ///     .expect("Could not load SWHID maps")
829    ///     .load_properties(SwhGraphProperties::load_timestamps)
830    ///     .expect("Could not load timestamps");
831    /// ```
832    pub fn load_properties<
833        M2: properties::MaybeMaps,
834        T2: properties::MaybeTimestamps,
835        P2: properties::MaybePersons,
836        C2: properties::MaybeContents,
837        S2: properties::MaybeStrings,
838        N2: properties::MaybeLabelNames,
839    >(
840        self,
841        loader: impl FnOnce(
842            properties::SwhGraphProperties<M, T, P, C, S, N>,
843        ) -> Result<properties::SwhGraphProperties<M2, T2, P2, C2, S2, N2>>,
844    ) -> Result<SwhBidirectionalGraph<properties::SwhGraphProperties<M2, T2, P2, C2, S2, N2>, FG, BG>>
845    {
846        Ok(SwhBidirectionalGraph {
847            properties: loader(self.properties)?,
848            basepath: self.basepath,
849            forward_graph: self.forward_graph,
850            backward_graph: self.backward_graph,
851        })
852    }
853}
854
855impl<FG: UnderlyingGraph, BG: UnderlyingGraph> SwhBidirectionalGraph<(), FG, BG> {
856    /// Prerequisite for `load_properties`
857    pub fn init_properties(
858        self,
859    ) -> SwhBidirectionalGraph<
860        properties::SwhGraphProperties<
861            properties::NoMaps,
862            properties::NoTimestamps,
863            properties::NoPersons,
864            properties::NoContents,
865            properties::NoStrings,
866            properties::NoLabelNames,
867        >,
868        FG,
869        BG,
870    > {
871        SwhBidirectionalGraph {
872            properties: properties::SwhGraphProperties::new(
873                &self.basepath,
874                self.forward_graph.num_nodes(),
875            ),
876            basepath: self.basepath,
877            forward_graph: self.forward_graph,
878            backward_graph: self.backward_graph,
879        }
880    }
881
882    /// Enriches the graph with more properties mmapped from disk
883    ///
884    /// # Example
885    ///
886    /// ```no_run
887    /// # use std::path::PathBuf;
888    /// use swh_graph::java_compat::mph::gov::GOVMPH;
889    ///
890    /// swh_graph::graph::SwhBidirectionalGraph::new(PathBuf::from("./graph"))
891    ///     .expect("Could not load graph")
892    ///     .load_all_properties::<GOVMPH>()
893    ///     .expect("Could not load properties");
894    /// ```
895    pub fn load_all_properties<MPHF: LoadableSwhidMphf>(
896        self,
897    ) -> Result<
898        SwhBidirectionalGraph<
899            properties::SwhGraphProperties<
900                properties::MappedMaps<MPHF>,
901                properties::MappedTimestamps,
902                properties::MappedPersons,
903                properties::MappedContents,
904                properties::MappedStrings,
905                properties::MappedLabelNames,
906            >,
907            FG,
908            BG,
909        >,
910    > {
911        self.init_properties()
912            .load_properties(|properties| properties.load_all())
913    }
914}
915
916impl<
917        MAPS: properties::MaybeMaps,
918        TIMESTAMPS: properties::MaybeTimestamps,
919        PERSONS: properties::MaybePersons,
920        CONTENTS: properties::MaybeContents,
921        STRINGS: properties::MaybeStrings,
922        LABELNAMES: properties::MaybeLabelNames,
923        BG: UnderlyingGraph,
924        FG: UnderlyingGraph,
925    > SwhGraphWithProperties
926    for SwhBidirectionalGraph<
927        properties::SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>,
928        FG,
929        BG,
930    >
931{
932    type Maps = MAPS;
933    type Timestamps = TIMESTAMPS;
934    type Persons = PERSONS;
935    type Contents = CONTENTS;
936    type Strings = STRINGS;
937    type LabelNames = LABELNAMES;
938
939    fn properties(
940        &self,
941    ) -> &properties::SwhGraphProperties<MAPS, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>
942    {
943        &self.properties
944    }
945}
946
947impl<P, FG: RandomAccessGraph + UnderlyingGraph, BG: UnderlyingGraph>
948    SwhBidirectionalGraph<P, FG, BG>
949{
950    /// Consumes this graph and returns a new one that implements [`SwhLabeledForwardGraph`]
951    pub fn load_forward_labels(self) -> Result<SwhBidirectionalGraph<P, Zip<FG, SwhLabeling>, BG>> {
952        Ok(SwhBidirectionalGraph {
953            forward_graph: zip_labels(self.forward_graph, suffix_path(&self.basepath, "-labelled"))
954                .context("Could not load forward labels")?,
955            backward_graph: self.backward_graph,
956            properties: self.properties,
957            basepath: self.basepath,
958        })
959    }
960}
961
962impl<P, FG: UnderlyingGraph, BG: RandomAccessGraph + UnderlyingGraph>
963    SwhBidirectionalGraph<P, FG, BG>
964{
965    /// Consumes this graph and returns a new one that implements [`SwhLabeledBackwardGraph`]
966    pub fn load_backward_labels(
967        self,
968    ) -> Result<SwhBidirectionalGraph<P, FG, Zip<BG, SwhLabeling>>> {
969        Ok(SwhBidirectionalGraph {
970            forward_graph: self.forward_graph,
971            backward_graph: zip_labels(
972                self.backward_graph,
973                // Note: keep british version of "labelled" here for compatibility with Java swh-graph
974                suffix_path(&self.basepath, "-transposed-labelled"),
975            )
976            .context("Could not load backward labels")?,
977            properties: self.properties,
978            basepath: self.basepath,
979        })
980    }
981}
982
983impl<P, FG: RandomAccessGraph + UnderlyingGraph, BG: RandomAccessGraph + UnderlyingGraph>
984    SwhBidirectionalGraph<P, FG, BG>
985{
986    /// Equivalent to calling both
987    /// [`load_forward_labels`](SwhBidirectionalGraph::load_forward_labels) and
988    /// [`load_backward_labels`](SwhBidirectionalGraph::load_backward_labels)
989    pub fn load_labels(
990        self,
991    ) -> Result<SwhBidirectionalGraph<P, Zip<FG, SwhLabeling>, Zip<BG, SwhLabeling>>> {
992        self.load_forward_labels()
993            .context("Could not load forward labels")?
994            .load_backward_labels()
995            .context("Could not load backward labels")
996    }
997}
998
999#[deprecated(
1000    since = "5.2.0",
1001    note = "please use `SwhUnidirectionalGraph::new` instead"
1002)]
1003/// Deprecated alias for [`SwhUnidirectionalGraph::new`]
1004pub fn load_unidirectional(basepath: impl AsRef<Path>) -> Result<SwhUnidirectionalGraph<()>> {
1005    SwhUnidirectionalGraph::new(basepath)
1006}
1007
1008#[deprecated(
1009    since = "5.2.0",
1010    note = "please use `SwhBidirectionalGraph::new` instead"
1011)]
1012/// Deprecated alias for [`SwhBidirectionalGraph::new`]
1013pub fn load_bidirectional(basepath: impl AsRef<Path>) -> Result<SwhBidirectionalGraph<()>> {
1014    SwhBidirectionalGraph::new(basepath)
1015}
1016
1017/// Loads a bidirectional graph, then all its properties, then its labels.
1018///
1019/// This is a shorthand for:
1020///
1021/// ```no_run
1022/// # use std::path::PathBuf;
1023/// # use anyhow::{Context, Result};
1024/// # use swh_graph::graph::SwhBidirectionalGraph;
1025/// # use swh_graph::mph::LoadableSwhidMphf;
1026/// #
1027/// # fn f<MPHF: LoadableSwhidMphf>() -> Result<()> {
1028/// # let basepath = PathBuf::from("./graph");
1029/// let graph = SwhBidirectionalGraph::new(basepath)
1030///     .context("Could not load graph")?
1031///     .load_all_properties::<MPHF>()
1032///     .context("Could not load properties")?
1033///     .load_labels()
1034///     .context("Could not load labels")?;
1035/// # Ok(())
1036/// # }
1037/// ```
1038///
1039/// When working on code made to be re-distributed, you should load only what is needed
1040/// (ie. load unidirectional if you don't need the backward graph, don't load properties
1041/// that are not needed, don't load labels unless they need to be read), so users can
1042/// run your code without a complete graph locally.
1043pub fn load_full<MPHF: LoadableSwhidMphf>(
1044    basepath: impl AsRef<Path>,
1045) -> Result<
1046    SwhBidirectionalGraph<
1047        properties::SwhGraphProperties<
1048            properties::MappedMaps<MPHF>,
1049            properties::MappedTimestamps,
1050            properties::MappedPersons,
1051            properties::MappedContents,
1052            properties::MappedStrings,
1053            properties::MappedLabelNames,
1054        >,
1055        Zip<DefaultUnderlyingGraph, SwhLabeling>,
1056        Zip<DefaultUnderlyingGraph, SwhLabeling>,
1057    >,
1058> {
1059    SwhBidirectionalGraph::new(basepath)
1060        .context("Could not load graph")?
1061        .load_all_properties()
1062        .context("Could not load properties")?
1063        .load_labels()
1064        .context("Could not load labels")
1065}
1066
1067fn zip_labels<G: RandomAccessGraph + UnderlyingGraph, P: AsRef<Path>>(
1068    graph: G,
1069    base_path: P,
1070) -> Result<Zip<G, SwhLabeling>> {
1071    let properties_path = suffix_path(&base_path, ".properties");
1072    let f = std::fs::File::open(&properties_path)
1073        .with_context(|| format!("Could not open {}", properties_path.display()))?;
1074    let map = java_properties::read(std::io::BufReader::new(f))?;
1075
1076    let graphclass = map
1077        .get("graphclass")
1078        .with_context(|| format!("Missing 'graphclass' from {}", properties_path.display()))?;
1079    // Note: keep british version of "labelled" here for compatibility with Java swh-graph
1080    if graphclass != "it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph" {
1081        bail!(
1082            "Expected graphclass in {} to be \"it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph\", got {:?}",
1083            properties_path.display(),
1084            graphclass
1085        );
1086    }
1087
1088    let labelspec = map
1089        .get("labelspec")
1090        .with_context(|| format!("Missing 'labelspec' from {}", properties_path.display()))?;
1091    let width = labelspec
1092        .strip_prefix("org.softwareheritage.graph.labels.SwhLabel(DirEntry,")
1093        .and_then(|labelspec| labelspec.strip_suffix(')'))
1094        .and_then(|labelspec| labelspec.parse::<usize>().ok());
1095    let width = match width {
1096        None =>
1097        bail!("Expected labelspec in {} to be \"org.softwareheritage.graph.labels.SwhLabel(DirEntry,<integer>)\" (where <integer> is a small integer, usually under 30), got {:?}", properties_path.display(), labelspec),
1098        Some(width) => width
1099    };
1100
1101    let labels = crate::labeling::mmap(&base_path, crate::labeling::SwhDeserializer::new(width))
1102        .with_context(|| {
1103            format!(
1104                "Could not load labeling from {}",
1105                base_path.as_ref().display()
1106            )
1107        })?;
1108
1109    Ok(Zip(graph, labels))
1110}