graph_builder/lib.rs
1#![cfg_attr(has_maybe_uninit_write_slice, feature(maybe_uninit_write_slice))]
2#![cfg_attr(has_new_uninit, feature(new_uninit))]
3#![cfg_attr(has_doc_cfg, feature(doc_cfg))]
4#![cfg_attr(has_slice_partition_dedup, feature(slice_partition_dedup))]
5
6//! A library that can be used as a building block for high-performant graph
7//! algorithms.
8//!
9//! Graph provides implementations for directed and undirected graphs. Graphs
10//! can be created programatically or read from custom input formats in a
11//! type-safe way. The library uses [rayon](https://github.com/rayon-rs/rayon)
12//! to parallelize all steps during graph creation.
13//!
14//! The implementation uses a Compressed-Sparse-Row (CSR) data structure which
15//! is tailored for fast and concurrent access to the graph topology.
16//!
17//! **Note**: The development is mainly driven by
18//! [Neo4j](https://github.com/neo4j/neo4j) developers. However, the library is
19//! __not__ an official product of Neo4j.
20//!
21//! # What is a graph?
22//!
23//! A graph consists of nodes and edges where edges connect exactly two nodes. A
24//! graph can be either directed, i.e., an edge has a source and a target node
25//! or undirected where there is no such distinction.
26//!
27//! In a directed graph, each node `u` has outgoing and incoming neighbors. An
28//! outgoing neighbor of node `u` is any node `v` for which an edge `(u, v)`
29//! exists. An incoming neighbor of node `u` is any node `v` for which an edge
30//! `(v, u)` exists.
31//!
32//! In an undirected graph there is no distinction between source and target
33//! node. A neighbor of node `u` is any node `v` for which either an edge `(u,
34//! v)` or `(v, u)` exists.
35//!
36//! # How to build a graph
37//!
38//! The library provides a builder that can be used to construct a graph from a
39//! given list of edges.
40//!
41//! For example, to create a directed graph that uses `usize` as node
42//! identifier, one can use the builder like so:
43//!
44//! ```
45//! use graph_builder::prelude::*;
46//!
47//! let graph: DirectedCsrGraph<usize> = GraphBuilder::new()
48//! .edges(vec![(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)])
49//! .build();
50//!
51//! assert_eq!(graph.node_count(), 4);
52//! assert_eq!(graph.edge_count(), 5);
53//!
54//! assert_eq!(graph.out_degree(1), 2);
55//! assert_eq!(graph.in_degree(1), 1);
56//!
57//! assert_eq!(graph.out_neighbors(1).as_slice(), &[2, 3]);
58//! assert_eq!(graph.in_neighbors(1).as_slice(), &[0]);
59//! ```
60//!
61//! To build an undirected graph using `u32` as node identifer, we only need to
62//! change the expected types:
63//!
64//! ```
65//! use graph_builder::prelude::*;
66//!
67//! let graph: UndirectedCsrGraph<u32> = GraphBuilder::new()
68//! .csr_layout(CsrLayout::Sorted)
69//! .edges(vec![(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)])
70//! .build();
71//!
72//! assert_eq!(graph.node_count(), 4);
73//! assert_eq!(graph.edge_count(), 5);
74//!
75//! assert_eq!(graph.degree(1), 3);
76//!
77//! assert_eq!(graph.neighbors(1).as_slice(), &[0, 2, 3]);
78//! ```
79//!
80//! Edges can have attached values to represent weighted graphs:
81//!
82//! ```
83//! use graph_builder::prelude::*;
84//!
85//! let graph: UndirectedCsrGraph<u32, (), f32> = GraphBuilder::new()
86//! .csr_layout(CsrLayout::Sorted)
87//! .edges_with_values(vec![(0, 1, 0.5), (0, 2, 0.7), (1, 2, 0.25), (1, 3, 1.0), (2, 3, 0.33)])
88//! .build();
89//!
90//! assert_eq!(graph.node_count(), 4);
91//! assert_eq!(graph.edge_count(), 5);
92//!
93//! assert_eq!(graph.degree(1), 3);
94//!
95//! assert_eq!(
96//! graph.neighbors_with_values(1).as_slice(),
97//! &[Target::new(0, 0.5), Target::new(2, 0.25), Target::new(3, 1.0)]
98//! );
99//! ```
100//!
101//! It is also possible to create a graph from a specific input format. In the
102//! following example we use the `EdgeListInput` which is an input format where
103//! each line of a file contains an edge of the graph.
104//!
105//! ```
106//! use std::path::PathBuf;
107//!
108//! use graph_builder::prelude::*;
109//!
110//! let path = [env!("CARGO_MANIFEST_DIR"), "resources", "example.el"]
111//! .iter()
112//! .collect::<PathBuf>();
113//!
114//! let graph: DirectedCsrGraph<usize> = GraphBuilder::new()
115//! .csr_layout(CsrLayout::Sorted)
116//! .file_format(EdgeListInput::default())
117//! .path(path)
118//! .build()
119//! .expect("loading failed");
120//!
121//! assert_eq!(graph.node_count(), 4);
122//! assert_eq!(graph.edge_count(), 5);
123//!
124//! assert_eq!(graph.out_degree(1), 2);
125//! assert_eq!(graph.in_degree(1), 1);
126//!
127//! assert_eq!(graph.out_neighbors(1).as_slice(), &[2, 3]);
128//! assert_eq!(graph.in_neighbors(1).as_slice(), &[0]);
129//! ```
130//!
131//! The `EdgeListInput` format also supports weighted edges. This can be
132//! controlled by a single type parameter on the graph type. Note, that the edge
133//! value type needs to implement [`crate::input::ParseValue`].
134//!
135//! ```
136//! use std::path::PathBuf;
137//!
138//! use graph_builder::prelude::*;
139//!
140//! let path = [env!("CARGO_MANIFEST_DIR"), "resources", "example.wel"]
141//! .iter()
142//! .collect::<PathBuf>();
143//!
144//! let graph: DirectedCsrGraph<usize, (), f32> = GraphBuilder::new()
145//! .csr_layout(CsrLayout::Sorted)
146//! .file_format(EdgeListInput::default())
147//! .path(path)
148//! .build()
149//! .expect("loading failed");
150//!
151//! assert_eq!(graph.node_count(), 4);
152//! assert_eq!(graph.edge_count(), 5);
153//!
154//! assert_eq!(graph.out_degree(1), 2);
155//! assert_eq!(graph.in_degree(1), 1);
156//!
157//! assert_eq!(
158//! graph.out_neighbors_with_values(1).as_slice(),
159//! &[Target::new(2, 0.25), Target::new(3, 1.0)]
160//! );
161//! assert_eq!(
162//! graph.in_neighbors_with_values(1).as_slice(),
163//! &[Target::new(0, 0.5)]
164//! );
165//! ```
166//!
167//! # Types of graphs
168//!
169//! The crate currently ships with two graph implementations:
170//!
171//! ## Compressed Sparse Row (CSR)
172//!
173//! [CSR](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format))
174//! is a data structure used for representing a sparse matrix. Since graphs can be modelled as adjacency
175//! matrix and are typically very sparse, i.e., not all possible pairs of nodes are connected
176//! by an edge, the CSR representation is very well suited for representing a real-world graph topology.
177//!
178//! In our current implementation, we use two arrays to model the edges. One array stores the adjacency
179//! lists for all nodes consecutively which requires `O(edge_count)` space. The other array stores the
180//! offset for each node in the first array where the corresponding adjacency list can be found which
181//! requires `O(node_count)` space. The degree of a node can be inferred from the offset array.
182//!
183//! Our CSR implementation is immutable, i.e., once built, the topology of the graph cannot be altered as
184//! it would require inserting target ids and shifting all elements to the right which is expensive and
185//! invalidates all offsets coming afterwards. However, building the CSR data structure from a list of
186//! edges is implement very efficiently using multi-threading.
187//!
188//! However, due to inlining the all adjacency lists in one `Vec`, access becomes very cache-friendly,
189//! as there is a chance that the adjacency list of the next node is already cached. Also, reading the
190//! graph from multiple threads is safe, as there will be never be a concurrent mutable access.
191//!
192//! One can use [`DirectedCsrGraph`] or [`UndirectedCsrGraph`] to build a CSR-based graph:
193//!
194//! ```
195//! use graph_builder::prelude::*;
196//!
197//! let graph: DirectedCsrGraph<usize> = GraphBuilder::new()
198//! .edges(vec![(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)])
199//! .build();
200//!
201//! assert_eq!(graph.node_count(), 4);
202//! assert_eq!(graph.edge_count(), 5);
203//!
204//! assert_eq!(graph.out_degree(1), 2);
205//! assert_eq!(graph.in_degree(1), 1);
206//!
207//! assert_eq!(graph.out_neighbors(1).as_slice(), &[2, 3]);
208//! assert_eq!(graph.in_neighbors(1).as_slice(), &[0]);
209//! ```
210//!
211//! ## Adjacency List (AL)
212//!
213//! In the Adjacency List implementation, we essentially store the graph as `Vec<Vec<ID>>`. The outer
214//! `Vec` has a length of `node_count` and at each index, we store the neighbors for that particular
215//! node in its own, heap-allocated `Vec`.
216//!
217//! The downside of that representation is that - compared to CSR - it is expected to be slower, both
218//! in building it and also in reading from it, as cache misses are becoming more likely due to the
219//! isolated heap allocations for individual neighbor lists.
220//!
221//! However, in contrast to CSR, an adjacency list is mutable, i.e., it is possible to add edges to the
222//! graph even after it has been built. This makes the data structure interesting for more flexible graph
223//! construction frameworks or for algorithms that need to add new edges as part of the computation.
224//! Currently, adding edges is constrained by source and target node already existing in the graph.
225//!
226//! Internally, the individual neighbor lists for each node are protected by a `Mutex` in order to support
227//! parallel read and write operations on the graph topology.
228//!
229//! One can use [`DirectedALGraph`] or [`UndirectedALGraph`] to build a Adjacency-List-based graph:
230//!
231//! ```
232//! use graph_builder::prelude::*;
233//!
234//! let graph: DirectedALGraph<usize> = GraphBuilder::new()
235//! .edges(vec![(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)])
236//! .build();
237//!
238//! assert_eq!(graph.node_count(), 4);
239//! assert_eq!(graph.edge_count(), 5);
240//!
241//! assert_eq!(graph.out_degree(1), 2);
242//! assert_eq!(graph.in_degree(1), 1);
243//!
244//! assert_eq!(graph.out_neighbors(1).as_slice(), &[2, 3]);
245//! assert_eq!(graph.in_neighbors(1).as_slice(), &[0]);
246//!
247//! // Let's mutate the graph by adding another edge
248//! graph.add_edge(1, 0);
249//! assert_eq!(graph.edge_count(), 6);
250//! assert_eq!(graph.out_neighbors(1).as_slice(), &[2, 3, 0]);
251//! ```
252
253pub mod builder;
254mod compat;
255pub mod graph;
256pub mod graph_ops;
257pub mod index;
258pub mod input;
259pub mod prelude;
260
261pub use crate::builder::GraphBuilder;
262pub use crate::graph::adj_list::DirectedALGraph;
263pub use crate::graph::adj_list::UndirectedALGraph;
264pub use crate::graph::csr::CsrLayout;
265pub use crate::graph::csr::DirectedCsrGraph;
266pub use crate::graph::csr::UndirectedCsrGraph;
267
268use std::convert::Infallible;
269
270use crate::graph::Target;
271use crate::index::Idx;
272use thiserror::Error;
273
274#[derive(Error, Debug)]
275pub enum Error {
276 #[error("error while loading graph")]
277 IoError {
278 #[from]
279 source: std::io::Error,
280 },
281 #[error("incompatible index type")]
282 IdxError {
283 #[from]
284 source: std::num::TryFromIntError,
285 },
286 #[cfg(feature = "gdl")]
287 #[cfg_attr(all(feature = "gdl", has_doc_cfg), doc(cfg(feature = "gdl")))]
288 #[error("error while parsing GDL input")]
289 GdlError {
290 #[from]
291 source: gdl::graph::GraphHandlerError,
292 },
293 #[error("invalid partitioning")]
294 InvalidPartitioning,
295 #[error("number of node values must be the same as node count")]
296 InvalidNodeValues,
297 #[error("invalid id size, expected {expected:?} bytes, got {actual:?} bytes")]
298 InvalidIdType { expected: String, actual: String },
299
300 #[error("node {node:?} does not exist in the graph")]
301 MissingNode { node: String },
302}
303
304impl From<Infallible> for Error {
305 fn from(_: Infallible) -> Self {
306 unreachable!()
307 }
308}
309
310/// A graph is a tuple `(N, E)`, where `N` is a set of nodes and `E` a set of
311/// edges. Each edge connects exactly two nodes.
312///
313/// `Graph` is parameterized over the node index type `Node` which is used to
314/// uniquely identify a node. An edge is a tuple of node identifiers.
315pub trait Graph<NI: Idx> {
316 /// Returns the number of nodes in the graph.
317 fn node_count(&self) -> NI;
318
319 /// Returns the number of edges in the graph.
320 fn edge_count(&self) -> NI;
321}
322
323/// A graph that allows storing a value per node.
324pub trait NodeValues<NI: Idx, NV> {
325 fn node_value(&self, node: NI) -> &NV;
326}
327
328pub trait UndirectedDegrees<NI: Idx> {
329 /// Returns the number of edges connected to the given node.
330 fn degree(&self, node: NI) -> NI;
331}
332
333/// Returns the neighbors of a given node.
334///
335/// The edge `(42, 1337)` is equivalent to the edge `(1337, 42)`.
336pub trait UndirectedNeighbors<NI: Idx> {
337 type NeighborsIterator<'a>: Iterator<Item = &'a NI>
338 where
339 Self: 'a;
340
341 /// Returns an iterator of all nodes connected to the given node.
342 fn neighbors(&self, node: NI) -> Self::NeighborsIterator<'_>;
343}
344
345/// Returns the neighbors of a given node.
346///
347/// The edge `(42, 1337)` is equivalent to the edge `(1337, 42)`.
348pub trait UndirectedNeighborsWithValues<NI: Idx, EV> {
349 type NeighborsIterator<'a>: Iterator<Item = &'a Target<NI, EV>>
350 where
351 Self: 'a,
352 EV: 'a;
353
354 /// Returns an iterator of all nodes connected to the given node
355 /// including the value of the connecting edge.
356 fn neighbors_with_values(&self, node: NI) -> Self::NeighborsIterator<'_>;
357}
358
359pub trait DirectedDegrees<NI: Idx> {
360 /// Returns the number of edges where the given node is a source node.
361 fn out_degree(&self, node: NI) -> NI;
362
363 /// Returns the number of edges where the given node is a target node.
364 fn in_degree(&self, node: NI) -> NI;
365}
366
367/// Returns the neighbors of a given node either in outgoing or incoming direction.
368///
369/// An edge tuple `e = (u, v)` has a source node `u` and a target node `v`. From
370/// the perspective of `u`, the edge `e` is an **outgoing** edge. From the
371/// perspective of node `v`, the edge `e` is an **incoming** edge. The edges
372/// `(u, v)` and `(v, u)` are not considered equivalent.
373pub trait DirectedNeighbors<NI: Idx> {
374 type NeighborsIterator<'a>: Iterator<Item = &'a NI>
375 where
376 Self: 'a;
377
378 /// Returns an iterator of all nodes which are connected in outgoing direction
379 /// to the given node, i.e., the given node is the source node of the
380 /// connecting edge.
381 fn out_neighbors(&self, node: NI) -> Self::NeighborsIterator<'_>;
382
383 /// Returns an iterator of all nodes which are connected in incoming direction
384 /// to the given node, i.e., the given node is the target node of the
385 /// connecting edge.
386 fn in_neighbors(&self, node: NI) -> Self::NeighborsIterator<'_>;
387}
388
389/// Returns the neighbors of a given node either in outgoing or incoming direction.
390///
391/// An edge tuple `e = (u, v)` has a source node `u` and a target node `v`. From
392/// the perspective of `u`, the edge `e` is an **outgoing** edge. From the
393/// perspective of node `v`, the edge `e` is an **incoming** edge. The edges
394/// `(u, v)` and `(v, u)` are not considered equivale
395pub trait DirectedNeighborsWithValues<NI: Idx, EV> {
396 type NeighborsIterator<'a>: Iterator<Item = &'a Target<NI, EV>>
397 where
398 Self: 'a,
399 EV: 'a;
400
401 /// Returns an iterator of all nodes which are connected in outgoing direction
402 /// to the given node, i.e., the given node is the source node of the
403 /// connecting edge. For each connected node, the value of the connecting
404 /// edge is also returned.
405 fn out_neighbors_with_values(&self, node: NI) -> Self::NeighborsIterator<'_>;
406
407 /// Returns an iterator of all nodes which are connected in incoming direction
408 /// to the given node, i.e., the given node is the target node of the
409 /// connecting edge. For each connected node, the value of the connecting
410 /// edge is also returned.
411 fn in_neighbors_with_values(&self, node: NI) -> Self::NeighborsIterator<'_>;
412}
413
414/// Allows adding new edges to a graph.
415pub trait EdgeMutation<NI: Idx> {
416 /// Adds a new edge between the given source and target node.
417 ///
418 /// # Errors
419 ///
420 /// If either the source or the target node does not exist,
421 /// the method will return [`Error::MissingNode`].
422 fn add_edge(&self, source: NI, target: NI) -> Result<(), Error>;
423
424 /// Adds a new edge between the given source and target node.
425 ///
426 /// Does not require locking the node-local list due to `&mut self`.
427 ///
428 /// # Errors
429 ///
430 /// If either the source or the target node does not exist,
431 /// the method will return [`Error::MissingNode`].
432 fn add_edge_mut(&mut self, source: NI, target: NI) -> Result<(), Error>;
433}
434
435/// Allows adding new edges to a graph.
436pub trait EdgeMutationWithValues<NI: Idx, EV> {
437 /// Adds a new edge between the given source and target node
438 /// and assigns the given value to it.
439 ///
440 /// # Errors
441 ///
442 /// If either the source or the target node does not exist,
443 /// the method will return [`Error::MissingNode`].
444 fn add_edge_with_value(&self, source: NI, target: NI, value: EV) -> Result<(), Error>;
445
446 /// Adds a new edge between the given source and target node
447 /// and assigns the given value to it.
448 ///
449 /// Does not require locking the node-local list due to `&mut self`.
450 ///
451 /// # Errors
452 ///
453 /// If either the source or the target node does not exist,
454 /// the method will return [`Error::MissingNode`].
455 fn add_edge_with_value_mut(&mut self, source: NI, target: NI, value: EV) -> Result<(), Error>;
456}
457
458#[repr(transparent)]
459pub struct SharedMut<T>(*mut T);
460unsafe impl<T: Send> Send for SharedMut<T> {}
461unsafe impl<T: Sync> Sync for SharedMut<T> {}
462
463impl<T> SharedMut<T> {
464 pub fn new(ptr: *mut T) -> Self {
465 SharedMut(ptr)
466 }
467
468 delegate::delegate! {
469 to self.0 {
470 /// # Safety
471 ///
472 /// Ensure that `count` does not exceed the capacity of the Vec.
473 pub unsafe fn add(&self, count: usize) -> *mut T;
474 }
475 }
476}