graph_builder/
lib.rs

1#![cfg_attr(has_maybe_uninit_write_slice, feature(maybe_uninit_write_slice))]
2#![cfg_attr(has_new_uninit, feature(new_uninit))]
3#![cfg_attr(has_doc_cfg, feature(doc_cfg))]
4#![cfg_attr(has_slice_partition_dedup, feature(slice_partition_dedup))]
5
6//! A library that can be used as a building block for high-performant graph
7//! algorithms.
8//!
9//! Graph provides implementations for directed and undirected graphs. Graphs
10//! can be created programatically or read from custom input formats in a
11//! type-safe way. The library uses [rayon](https://github.com/rayon-rs/rayon)
12//! to parallelize all steps during graph creation.
13//!
14//! The implementation uses a Compressed-Sparse-Row (CSR) data structure which
15//! is tailored for fast and concurrent access to the graph topology.
16//!
17//! **Note**: The development is mainly driven by
18//! [Neo4j](https://github.com/neo4j/neo4j) developers. However, the library is
19//! __not__ an official product of Neo4j.
20//!
21//! # What is a graph?
22//!
23//! A graph consists of nodes and edges where edges connect exactly two nodes. A
24//! graph can be either directed, i.e., an edge has a source and a target node
25//! or undirected where there is no such distinction.
26//!
27//! In a directed graph, each node `u` has outgoing and incoming neighbors. An
28//! outgoing neighbor of node `u` is any node `v` for which an edge `(u, v)`
29//! exists. An incoming neighbor of node `u` is any node `v` for which an edge
30//! `(v, u)` exists.
31//!
32//! In an undirected graph there is no distinction between source and target
33//! node. A neighbor of node `u` is any node `v` for which either an edge `(u,
34//! v)` or `(v, u)` exists.
35//!
36//! # How to build a graph
37//!
38//! The library provides a builder that can be used to construct a graph from a
39//! given list of edges.
40//!
41//! For example, to create a directed graph that uses `usize` as node
42//! identifier, one can use the builder like so:
43//!
44//! ```
45//! use graph_builder::prelude::*;
46//!
47//! let graph: DirectedCsrGraph<usize> = GraphBuilder::new()
48//!     .edges(vec![(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)])
49//!     .build();
50//!
51//! assert_eq!(graph.node_count(), 4);
52//! assert_eq!(graph.edge_count(), 5);
53//!
54//! assert_eq!(graph.out_degree(1), 2);
55//! assert_eq!(graph.in_degree(1), 1);
56//!
57//! assert_eq!(graph.out_neighbors(1).as_slice(), &[2, 3]);
58//! assert_eq!(graph.in_neighbors(1).as_slice(), &[0]);
59//! ```
60//!
61//! To build an undirected graph using `u32` as node identifer, we only need to
62//! change the expected types:
63//!
64//! ```
65//! use graph_builder::prelude::*;
66//!
67//! let graph: UndirectedCsrGraph<u32> = GraphBuilder::new()
68//!     .csr_layout(CsrLayout::Sorted)
69//!     .edges(vec![(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)])
70//!     .build();
71//!
72//! assert_eq!(graph.node_count(), 4);
73//! assert_eq!(graph.edge_count(), 5);
74//!
75//! assert_eq!(graph.degree(1), 3);
76//!
77//! assert_eq!(graph.neighbors(1).as_slice(), &[0, 2, 3]);
78//! ```
79//!
80//! Edges can have attached values to represent weighted graphs:
81//!
82//! ```
83//! use graph_builder::prelude::*;
84//!
85//! let graph: UndirectedCsrGraph<u32, (), f32> = GraphBuilder::new()
86//!     .csr_layout(CsrLayout::Sorted)
87//!     .edges_with_values(vec![(0, 1, 0.5), (0, 2, 0.7), (1, 2, 0.25), (1, 3, 1.0), (2, 3, 0.33)])
88//!     .build();
89//!
90//! assert_eq!(graph.node_count(), 4);
91//! assert_eq!(graph.edge_count(), 5);
92//!
93//! assert_eq!(graph.degree(1), 3);
94//!
95//! assert_eq!(
96//!     graph.neighbors_with_values(1).as_slice(),
97//!     &[Target::new(0, 0.5), Target::new(2, 0.25), Target::new(3, 1.0)]
98//! );
99//! ```
100//!
101//! It is also possible to create a graph from a specific input format. In the
102//! following example we use the `EdgeListInput` which is an input format where
103//! each line of a file contains an edge of the graph.
104//!
105//! ```
106//! use std::path::PathBuf;
107//!
108//! use graph_builder::prelude::*;
109//!
110//! let path = [env!("CARGO_MANIFEST_DIR"), "resources", "example.el"]
111//!     .iter()
112//!     .collect::<PathBuf>();
113//!
114//! let graph: DirectedCsrGraph<usize> = GraphBuilder::new()
115//!     .csr_layout(CsrLayout::Sorted)
116//!     .file_format(EdgeListInput::default())
117//!     .path(path)
118//!     .build()
119//!     .expect("loading failed");
120//!
121//! assert_eq!(graph.node_count(), 4);
122//! assert_eq!(graph.edge_count(), 5);
123//!
124//! assert_eq!(graph.out_degree(1), 2);
125//! assert_eq!(graph.in_degree(1), 1);
126//!
127//! assert_eq!(graph.out_neighbors(1).as_slice(), &[2, 3]);
128//! assert_eq!(graph.in_neighbors(1).as_slice(), &[0]);
129//! ```
130//!
131//! The `EdgeListInput` format also supports weighted edges. This can be
132//! controlled by a single type parameter on the graph type. Note, that the edge
133//! value type needs to implement [`crate::input::ParseValue`].
134//!
135//! ```
136//! use std::path::PathBuf;
137//!
138//! use graph_builder::prelude::*;
139//!
140//! let path = [env!("CARGO_MANIFEST_DIR"), "resources", "example.wel"]
141//!     .iter()
142//!     .collect::<PathBuf>();
143//!
144//! let graph: DirectedCsrGraph<usize, (), f32> = GraphBuilder::new()
145//!     .csr_layout(CsrLayout::Sorted)
146//!     .file_format(EdgeListInput::default())
147//!     .path(path)
148//!     .build()
149//!     .expect("loading failed");
150//!
151//! assert_eq!(graph.node_count(), 4);
152//! assert_eq!(graph.edge_count(), 5);
153//!
154//! assert_eq!(graph.out_degree(1), 2);
155//! assert_eq!(graph.in_degree(1), 1);
156//!
157//! assert_eq!(
158//!     graph.out_neighbors_with_values(1).as_slice(),
159//!     &[Target::new(2, 0.25), Target::new(3, 1.0)]
160//! );
161//! assert_eq!(
162//!     graph.in_neighbors_with_values(1).as_slice(),
163//!     &[Target::new(0, 0.5)]
164//! );
165//! ```
166//!
167//! # Types of graphs
168//!
169//! The crate currently ships with two graph implementations:
170//!
171//! ## Compressed Sparse Row (CSR)
172//!
173//! [CSR](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format))
174//! is a data structure used for representing a sparse matrix. Since graphs can be modelled as adjacency
175//! matrix and are typically very sparse, i.e., not all possible pairs of nodes are connected
176//! by an edge, the CSR representation is very well suited for representing a real-world graph topology.
177//!
178//! In our current implementation, we use two arrays to model the edges. One array stores the adjacency
179//! lists for all nodes consecutively which requires `O(edge_count)` space. The other array stores the
180//! offset for each node in the first array where the corresponding adjacency list can be found which
181//! requires `O(node_count)` space. The degree of a node can be inferred from the offset array.
182//!
183//! Our CSR implementation is immutable, i.e., once built, the topology of the graph cannot be altered as
184//! it would require inserting target ids and shifting all elements to the right which is expensive and
185//! invalidates all offsets coming afterwards. However, building the CSR data structure from a list of
186//! edges is implement very efficiently using multi-threading.
187//!
188//! However, due to inlining the all adjacency lists in one `Vec`, access becomes very cache-friendly,
189//! as there is a chance that the adjacency list of the next node is already cached. Also, reading the
190//! graph from multiple threads is safe, as there will be never be a concurrent mutable access.
191//!
192//! One can use [`DirectedCsrGraph`] or [`UndirectedCsrGraph`] to build a CSR-based graph:
193//!
194//! ```
195//! use graph_builder::prelude::*;
196//!
197//! let graph: DirectedCsrGraph<usize> = GraphBuilder::new()
198//!     .edges(vec![(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)])
199//!     .build();
200//!
201//! assert_eq!(graph.node_count(), 4);
202//! assert_eq!(graph.edge_count(), 5);
203//!
204//! assert_eq!(graph.out_degree(1), 2);
205//! assert_eq!(graph.in_degree(1), 1);
206//!
207//! assert_eq!(graph.out_neighbors(1).as_slice(), &[2, 3]);
208//! assert_eq!(graph.in_neighbors(1).as_slice(), &[0]);
209//! ```
210//!
211//! ## Adjacency List (AL)
212//!
213//! In the Adjacency List implementation, we essentially store the graph as `Vec<Vec<ID>>`. The outer
214//! `Vec` has a length of `node_count` and at each index, we store the neighbors for that particular
215//! node in its own, heap-allocated `Vec`.
216//!
217//! The downside of that representation is that - compared to CSR - it is expected to be slower, both
218//! in building it and also in reading from it, as cache misses are becoming more likely due to the
219//! isolated heap allocations for individual neighbor lists.
220//!
221//! However, in contrast to CSR, an adjacency list is mutable, i.e., it is possible to add edges to the
222//! graph even after it has been built. This makes the data structure interesting for more flexible graph
223//! construction frameworks or for algorithms that need to add new edges as part of the computation.
224//! Currently, adding edges is constrained by source and target node already existing in the graph.
225//!
226//! Internally, the individual neighbor lists for each node are protected by a `Mutex` in order to support
227//! parallel read and write operations on the graph topology.
228//!
229//! One can use [`DirectedALGraph`] or [`UndirectedALGraph`] to build a Adjacency-List-based graph:
230//!
231//! ```
232//! use graph_builder::prelude::*;
233//!
234//! let graph: DirectedALGraph<usize> = GraphBuilder::new()
235//!     .edges(vec![(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)])
236//!     .build();
237//!
238//! assert_eq!(graph.node_count(), 4);
239//! assert_eq!(graph.edge_count(), 5);
240//!
241//! assert_eq!(graph.out_degree(1), 2);
242//! assert_eq!(graph.in_degree(1), 1);
243//!
244//! assert_eq!(graph.out_neighbors(1).as_slice(), &[2, 3]);
245//! assert_eq!(graph.in_neighbors(1).as_slice(), &[0]);
246//!
247//! // Let's mutate the graph by adding another edge
248//! graph.add_edge(1, 0);
249//! assert_eq!(graph.edge_count(), 6);
250//! assert_eq!(graph.out_neighbors(1).as_slice(), &[2, 3, 0]);
251//! ```
252
253pub mod builder;
254mod compat;
255pub mod graph;
256pub mod graph_ops;
257pub mod index;
258pub mod input;
259pub mod prelude;
260
261pub use crate::builder::GraphBuilder;
262pub use crate::graph::adj_list::DirectedALGraph;
263pub use crate::graph::adj_list::UndirectedALGraph;
264pub use crate::graph::csr::CsrLayout;
265pub use crate::graph::csr::DirectedCsrGraph;
266pub use crate::graph::csr::UndirectedCsrGraph;
267
268use std::convert::Infallible;
269
270use crate::graph::Target;
271use crate::index::Idx;
272use thiserror::Error;
273
274#[derive(Error, Debug)]
275pub enum Error {
276    #[error("error while loading graph")]
277    IoError {
278        #[from]
279        source: std::io::Error,
280    },
281    #[error("incompatible index type")]
282    IdxError {
283        #[from]
284        source: std::num::TryFromIntError,
285    },
286    #[cfg(feature = "gdl")]
287    #[cfg_attr(all(feature = "gdl", has_doc_cfg), doc(cfg(feature = "gdl")))]
288    #[error("error while parsing GDL input")]
289    GdlError {
290        #[from]
291        source: gdl::graph::GraphHandlerError,
292    },
293    #[error("invalid partitioning")]
294    InvalidPartitioning,
295    #[error("number of node values must be the same as node count")]
296    InvalidNodeValues,
297    #[error("invalid id size, expected {expected:?} bytes, got {actual:?} bytes")]
298    InvalidIdType { expected: String, actual: String },
299
300    #[error("node {node:?} does not exist in the graph")]
301    MissingNode { node: String },
302}
303
304impl From<Infallible> for Error {
305    fn from(_: Infallible) -> Self {
306        unreachable!()
307    }
308}
309
310/// A graph is a tuple `(N, E)`, where `N` is a set of nodes and `E` a set of
311/// edges. Each edge connects exactly two nodes.
312///
313/// `Graph` is parameterized over the node index type `Node` which is used to
314/// uniquely identify a node. An edge is a tuple of node identifiers.
315pub trait Graph<NI: Idx> {
316    /// Returns the number of nodes in the graph.
317    fn node_count(&self) -> NI;
318
319    /// Returns the number of edges in the graph.
320    fn edge_count(&self) -> NI;
321}
322
323/// A graph that allows storing a value per node.
324pub trait NodeValues<NI: Idx, NV> {
325    fn node_value(&self, node: NI) -> &NV;
326}
327
328pub trait UndirectedDegrees<NI: Idx> {
329    /// Returns the number of edges connected to the given node.
330    fn degree(&self, node: NI) -> NI;
331}
332
333/// Returns the neighbors of a given node.
334///
335/// The edge `(42, 1337)` is equivalent to the edge `(1337, 42)`.
336pub trait UndirectedNeighbors<NI: Idx> {
337    type NeighborsIterator<'a>: Iterator<Item = &'a NI>
338    where
339        Self: 'a;
340
341    /// Returns an iterator of all nodes connected to the given node.
342    fn neighbors(&self, node: NI) -> Self::NeighborsIterator<'_>;
343}
344
345/// Returns the neighbors of a given node.
346///
347/// The edge `(42, 1337)` is equivalent to the edge `(1337, 42)`.
348pub trait UndirectedNeighborsWithValues<NI: Idx, EV> {
349    type NeighborsIterator<'a>: Iterator<Item = &'a Target<NI, EV>>
350    where
351        Self: 'a,
352        EV: 'a;
353
354    /// Returns an iterator of all nodes connected to the given node
355    /// including the value of the connecting edge.
356    fn neighbors_with_values(&self, node: NI) -> Self::NeighborsIterator<'_>;
357}
358
359pub trait DirectedDegrees<NI: Idx> {
360    /// Returns the number of edges where the given node is a source node.
361    fn out_degree(&self, node: NI) -> NI;
362
363    /// Returns the number of edges where the given node is a target node.
364    fn in_degree(&self, node: NI) -> NI;
365}
366
367/// Returns the neighbors of a given node either in outgoing or incoming direction.
368///
369/// An edge tuple `e = (u, v)` has a source node `u` and a target node `v`. From
370/// the perspective of `u`, the edge `e` is an **outgoing** edge. From the
371/// perspective of node `v`, the edge `e` is an **incoming** edge. The edges
372/// `(u, v)` and `(v, u)` are not considered equivalent.
373pub trait DirectedNeighbors<NI: Idx> {
374    type NeighborsIterator<'a>: Iterator<Item = &'a NI>
375    where
376        Self: 'a;
377
378    /// Returns an iterator of all nodes which are connected in outgoing direction
379    /// to the given node, i.e., the given node is the source node of the
380    /// connecting edge.
381    fn out_neighbors(&self, node: NI) -> Self::NeighborsIterator<'_>;
382
383    /// Returns an iterator of all nodes which are connected in incoming direction
384    /// to the given node, i.e., the given node is the target node of the
385    /// connecting edge.
386    fn in_neighbors(&self, node: NI) -> Self::NeighborsIterator<'_>;
387}
388
389/// Returns the neighbors of a given node either in outgoing or incoming direction.
390///
391/// An edge tuple `e = (u, v)` has a source node `u` and a target node `v`. From
392/// the perspective of `u`, the edge `e` is an **outgoing** edge. From the
393/// perspective of node `v`, the edge `e` is an **incoming** edge. The edges
394/// `(u, v)` and `(v, u)` are not considered equivale
395pub trait DirectedNeighborsWithValues<NI: Idx, EV> {
396    type NeighborsIterator<'a>: Iterator<Item = &'a Target<NI, EV>>
397    where
398        Self: 'a,
399        EV: 'a;
400
401    /// Returns an iterator of all nodes which are connected in outgoing direction
402    /// to the given node, i.e., the given node is the source node of the
403    /// connecting edge. For each connected node, the value of the connecting
404    /// edge is also returned.
405    fn out_neighbors_with_values(&self, node: NI) -> Self::NeighborsIterator<'_>;
406
407    /// Returns an iterator of all nodes which are connected in incoming direction
408    /// to the given node, i.e., the given node is the target node of the
409    /// connecting edge. For each connected node, the value of the connecting
410    /// edge is also returned.
411    fn in_neighbors_with_values(&self, node: NI) -> Self::NeighborsIterator<'_>;
412}
413
414/// Allows adding new edges to a graph.
415pub trait EdgeMutation<NI: Idx> {
416    /// Adds a new edge between the given source and target node.
417    ///
418    /// # Errors
419    ///
420    /// If either the source or the target node does not exist,
421    /// the method will return [`Error::MissingNode`].
422    fn add_edge(&self, source: NI, target: NI) -> Result<(), Error>;
423
424    /// Adds a new edge between the given source and target node.
425    ///
426    /// Does not require locking the node-local list due to `&mut self`.
427    ///
428    /// # Errors
429    ///
430    /// If either the source or the target node does not exist,
431    /// the method will return [`Error::MissingNode`].
432    fn add_edge_mut(&mut self, source: NI, target: NI) -> Result<(), Error>;
433}
434
435/// Allows adding new edges to a graph.
436pub trait EdgeMutationWithValues<NI: Idx, EV> {
437    /// Adds a new edge between the given source and target node
438    /// and assigns the given value to it.
439    ///
440    /// # Errors
441    ///
442    /// If either the source or the target node does not exist,
443    /// the method will return [`Error::MissingNode`].
444    fn add_edge_with_value(&self, source: NI, target: NI, value: EV) -> Result<(), Error>;
445
446    /// Adds a new edge between the given source and target node
447    /// and assigns the given value to it.
448    ///
449    /// Does not require locking the node-local list due to `&mut self`.
450    ///
451    /// # Errors
452    ///
453    /// If either the source or the target node does not exist,
454    /// the method will return [`Error::MissingNode`].
455    fn add_edge_with_value_mut(&mut self, source: NI, target: NI, value: EV) -> Result<(), Error>;
456}
457
458#[repr(transparent)]
459pub struct SharedMut<T>(*mut T);
460unsafe impl<T: Send> Send for SharedMut<T> {}
461unsafe impl<T: Sync> Sync for SharedMut<T> {}
462
463impl<T> SharedMut<T> {
464    pub fn new(ptr: *mut T) -> Self {
465        SharedMut(ptr)
466    }
467
468    delegate::delegate! {
469        to self.0 {
470            /// # Safety
471            ///
472            /// Ensure that `count` does not exceed the capacity of the Vec.
473            pub unsafe fn add(&self, count: usize) -> *mut T;
474        }
475    }
476}