swh_graph/
serde.rs

1// Copyright (C) 2024-2025  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6//! Serialization and deserialization of (small) graphs using [`serde`]
7
8use std::path::PathBuf;
9
10use serde::de::*;
11use serde::ser::*;
12use serde::*;
13use webgraph::graphs::vec_graph::LabeledVecGraph;
14
15use crate::graph::*;
16use crate::properties;
17use crate::{SwhGraphProperties, SWHID};
18
19#[derive(Serialize, Deserialize)]
20struct SerializedGraph<Contents, LabelNames, Persons, Strings, Timestamps> {
21    swhids: Vec<SWHID>,
22    contents: Contents,
23    label_names: LabelNames,
24    persons: Persons,
25    strings: Strings,
26    timestamps: Timestamps,
27    /// `node_id -> (node_id, vec![label])`
28    arcs: Vec<Vec<(usize, Vec<u64>)>>,
29}
30
31/// Serializes a (small) graph using [`serde`] instead of the normal serialization
32pub fn serialize_with_labels_and_maps<
33    S: Serializer,
34    G: SwhLabeledForwardGraph + SwhGraphWithProperties,
35>(
36    serializer: S,
37    graph: &G,
38) -> Result<S::Ok, S::Error>
39where
40    <G as SwhGraphWithProperties>::Maps: properties::Maps,
41    <G as SwhGraphWithProperties>::Contents: serde::Serialize,
42    <G as SwhGraphWithProperties>::LabelNames: serde::Serialize,
43    <G as SwhGraphWithProperties>::Persons: serde::Serialize,
44    <G as SwhGraphWithProperties>::Strings: serde::Serialize,
45    <G as SwhGraphWithProperties>::Timestamps: serde::Serialize,
46{
47    SerializedGraph {
48        swhids: (0..graph.num_nodes())
49            .map(|node| graph.properties().swhid(node))
50            .collect(),
51        contents: &graph.properties().contents,
52        label_names: &graph.properties().label_names,
53        persons: &graph.properties().persons,
54        strings: &graph.properties().strings,
55        timestamps: &graph.properties().timestamps,
56        arcs: (0..graph.num_nodes())
57            .map(|node| {
58                graph
59                    .untyped_labeled_successors(node)
60                    .into_iter()
61                    .map(|(succ, labels)| (succ, labels.into_iter().map(|label| label.0).collect()))
62                    .collect()
63            })
64            .collect(),
65    }
66    .serialize(serializer)
67}
68
69#[allow(clippy::type_complexity)]
70/// Deserializes a (small) graph using [`serde`] instead of the normal deserialization, and
71/// returns a fully in-memory graph, as if built by
72/// [`GraphBuilder`](crate::graph_builder::GraphBuilder)
73///
74/// The `path` is not read, but is used to set the return value of [`SwhBidirectionalGraph::path`].
75pub fn deserialize_with_labels_and_maps<
76    'de,
77    D: Deserializer<'de>,
78    TIMESTAMPS: properties::MaybeTimestamps + Deserialize<'de>,
79    PERSONS: properties::MaybePersons + Deserialize<'de>,
80    CONTENTS: properties::MaybeContents + Deserialize<'de>,
81    STRINGS: properties::MaybeStrings + Deserialize<'de>,
82    LABELNAMES: properties::MaybeLabelNames + Deserialize<'de>,
83>(
84    deserializer: D,
85    path: PathBuf,
86) -> Result<
87    SwhBidirectionalGraph<
88        SwhGraphProperties<properties::VecMaps, TIMESTAMPS, PERSONS, CONTENTS, STRINGS, LABELNAMES>,
89        LabeledVecGraph<Vec<u64>>,
90        LabeledVecGraph<Vec<u64>>,
91    >,
92    /* XXX: I'd like to return this instead:
93    SwhBidirectionalGraph<
94        SwhGraphProperties<
95            impl properties::Maps + Send + Sync + 'static,
96            TIMESTAMPS,
97            PERSONS,
98            CONTENTS,
99            STRINGS,
100            LABELNAMES,
101        >,
102        impl UnderlyingGraph + Send + Sync + 'static,
103        impl UnderlyingGraph + Send + Sync + 'static,
104    >,
105    but it makes this function pretty hard to use, as return-position impls capture D's lifetime
106    despite + 'static. See https://github.com/rust-lang/rust/issues/132364
107    */
108    D::Error,
109> {
110    let graph: SerializedGraph<_, _, _, _, _> = SerializedGraph::deserialize(deserializer)?;
111    let forward_arcs: Vec<(NodeId, NodeId, Vec<u64>)> = graph
112        .arcs
113        .iter()
114        .enumerate()
115        .flat_map(|(src, arcs)| {
116            arcs.iter()
117                .map(move |(dst, labels)| (src, *dst, labels.clone()))
118        })
119        .collect();
120    let backward_arcs: Vec<(NodeId, NodeId, Vec<u64>)> = graph
121        .arcs
122        .iter()
123        .enumerate()
124        .flat_map(|(src, arcs)| {
125            arcs.iter()
126                .map(move |(dst, labels)| (*dst, src, labels.clone()))
127        })
128        .collect();
129    Ok(SwhBidirectionalGraph::from_underlying_graphs(
130        path,
131        LabeledVecGraph::from_arcs(forward_arcs),
132        LabeledVecGraph::from_arcs(backward_arcs),
133    )
134    .init_properties()
135    .load_properties(move |properties| {
136        Ok(properties
137            .with_maps(properties::VecMaps::new(graph.swhids))
138            .expect("Could not join maps")
139            .with_contents(graph.contents)
140            .expect("Could not join VecContents")
141            .with_label_names(graph.label_names)
142            .expect("Could not join maps")
143            .with_persons(graph.persons)
144            .expect("Could not join persons")
145            .with_strings(graph.strings)
146            .expect("Could not join strings")
147            .with_timestamps(graph.timestamps)
148            .expect("Could not join timestamps"))
149    })
150    .expect("Could not load properties"))
151}