1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
#![cfg_attr(has_maybe_uninit_write_slice, feature(maybe_uninit_write_slice))]
#![cfg_attr(has_new_uninit, feature(new_uninit))]
#![cfg_attr(has_doc_cfg, feature(doc_cfg))]
#![cfg_attr(has_slice_partition_dedup, feature(slice_partition_dedup))]
//! A library that can be used as a building block for high-performant graph
//! algorithms.
//!
//! Graph provides implementations for directed and undirected graphs. Graphs
//! can be created programatically or read from custom input formats in a
//! type-safe way. The library uses [rayon](https://github.com/rayon-rs/rayon)
//! to parallelize all steps during graph creation.
//!
//! The implementation uses a Compressed-Sparse-Row (CSR) data structure which
//! is tailored for fast and concurrent access to the graph topology.
//!
//! **Note**: The development is mainly driven by
//! [Neo4j](https://github.com/neo4j/neo4j) developers. However, the library is
//! __not__ an official product of Neo4j.
//!
//! # What is a graph?
//!
//! A graph consists of nodes and edges where edges connect exactly two nodes. A
//! graph can be either directed, i.e., an edge has a source and a target node
//! or undirected where there is no such distinction.
//!
//! In a directed graph, each node `u` has outgoing and incoming neighbors. An
//! outgoing neighbor of node `u` is any node `v` for which an edge `(u, v)`
//! exists. An incoming neighbor of node `u` is any node `v` for which an edge
//! `(v, u)` exists.
//!
//! In an undirected graph there is no distinction between source and target
//! node. A neighbor of node `u` is any node `v` for which either an edge `(u,
//! v)` or `(v, u)` exists.
//!
//! # How to build a graph
//!
//! The library provides a builder that can be used to construct a graph from a
//! given list of edges.
//!
//! For example, to create a directed graph that uses `usize` as node
//! identifier, one can use the builder like so:
//!
//! ```
//! use graph_builder::prelude::*;
//!
//! let graph: DirectedCsrGraph<usize> = GraphBuilder::new()
//! .edges(vec![(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)])
//! .build();
//!
//! assert_eq!(graph.node_count(), 4);
//! assert_eq!(graph.edge_count(), 5);
//!
//! assert_eq!(graph.out_degree(1), 2);
//! assert_eq!(graph.in_degree(1), 1);
//!
//! assert_eq!(graph.out_neighbors(1).as_slice(), &[2, 3]);
//! assert_eq!(graph.in_neighbors(1).as_slice(), &[0]);
//! ```
//!
//! To build an undirected graph using `u32` as node identifer, we only need to
//! change the expected types:
//!
//! ```
//! use graph_builder::prelude::*;
//!
//! let graph: UndirectedCsrGraph<u32> = GraphBuilder::new()
//! .csr_layout(CsrLayout::Sorted)
//! .edges(vec![(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)])
//! .build();
//!
//! assert_eq!(graph.node_count(), 4);
//! assert_eq!(graph.edge_count(), 5);
//!
//! assert_eq!(graph.degree(1), 3);
//!
//! assert_eq!(graph.neighbors(1).as_slice(), &[0, 2, 3]);
//! ```
//!
//! Edges can have attached values to represent weighted graphs:
//!
//! ```
//! use graph_builder::prelude::*;
//!
//! let graph: UndirectedCsrGraph<u32, (), f32> = GraphBuilder::new()
//! .csr_layout(CsrLayout::Sorted)
//! .edges_with_values(vec![(0, 1, 0.5), (0, 2, 0.7), (1, 2, 0.25), (1, 3, 1.0), (2, 3, 0.33)])
//! .build();
//!
//! assert_eq!(graph.node_count(), 4);
//! assert_eq!(graph.edge_count(), 5);
//!
//! assert_eq!(graph.degree(1), 3);
//!
//! assert_eq!(
//! graph.neighbors_with_values(1).as_slice(),
//! &[Target::new(0, 0.5), Target::new(2, 0.25), Target::new(3, 1.0)]
//! );
//! ```
//!
//! It is also possible to create a graph from a specific input format. In the
//! following example we use the `EdgeListInput` which is an input format where
//! each line of a file contains an edge of the graph.
//!
//! ```
//! use std::path::PathBuf;
//!
//! use graph_builder::prelude::*;
//!
//! let path = [env!("CARGO_MANIFEST_DIR"), "resources", "example.el"]
//! .iter()
//! .collect::<PathBuf>();
//!
//! let graph: DirectedCsrGraph<usize> = GraphBuilder::new()
//! .csr_layout(CsrLayout::Sorted)
//! .file_format(EdgeListInput::default())
//! .path(path)
//! .build()
//! .expect("loading failed");
//!
//! assert_eq!(graph.node_count(), 4);
//! assert_eq!(graph.edge_count(), 5);
//!
//! assert_eq!(graph.out_degree(1), 2);
//! assert_eq!(graph.in_degree(1), 1);
//!
//! assert_eq!(graph.out_neighbors(1).as_slice(), &[2, 3]);
//! assert_eq!(graph.in_neighbors(1).as_slice(), &[0]);
//! ```
//!
//! The `EdgeListInput` format also supports weighted edges. This can be
//! controlled by a single type parameter on the graph type. Note, that the edge
//! value type needs to implement [`crate::input::ParseValue`].
//!
//! ```
//! use std::path::PathBuf;
//!
//! use graph_builder::prelude::*;
//!
//! let path = [env!("CARGO_MANIFEST_DIR"), "resources", "example.wel"]
//! .iter()
//! .collect::<PathBuf>();
//!
//! let graph: DirectedCsrGraph<usize, (), f32> = GraphBuilder::new()
//! .csr_layout(CsrLayout::Sorted)
//! .file_format(EdgeListInput::default())
//! .path(path)
//! .build()
//! .expect("loading failed");
//!
//! assert_eq!(graph.node_count(), 4);
//! assert_eq!(graph.edge_count(), 5);
//!
//! assert_eq!(graph.out_degree(1), 2);
//! assert_eq!(graph.in_degree(1), 1);
//!
//! assert_eq!(
//! graph.out_neighbors_with_values(1).as_slice(),
//! &[Target::new(2, 0.25), Target::new(3, 1.0)]
//! );
//! assert_eq!(
//! graph.in_neighbors_with_values(1).as_slice(),
//! &[Target::new(0, 0.5)]
//! );
//! ```
//!
//! # Types of graphs
//!
//! The crate currently ships with two graph implementations:
//!
//! ## Compressed Sparse Row (CSR)
//!
//! [CSR](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format))
//! is a data structure used for representing a sparse matrix. Since graphs can be modelled as adjacency
//! matrix and are typically very sparse, i.e., not all possible pairs of nodes are connected
//! by an edge, the CSR representation is very well suited for representing a real-world graph topology.
//!
//! In our current implementation, we use two arrays two model the edges. One array stores the adjacency
//! lists for all nodes consecutively which requires `O(edge_count)` space. The other array stores the
//! offset for each node in the first array where the corresponding adjacency list can be found which
//! requires `O(node_count)` space. The degree of a node can be inferred from the offset array.
//!
//! Our CSR implementation is immutable, i.e., once built, the topology of the graph cannot be altered as
//! it would require inserting target ids and shifting all elements to the right which is expensive and
//! invalidates all offsets coming afterwards. However, building the CSR data structure from a list of
//! edges is implement very efficiently using multi-threading.
//!
//! However, due to inlining the all adjacency lists in one `Vec`, access becomes very cache-friendly,
//! as there is a chance that the adjacency list of the next node is already cached. Also, reading the
//! graph from multiple threads is safe, as there will be never be a concurrent mutable access.
//!
//! One can use [`DirectedCsrGraph`] or [`UndirectedCsrGraph`] to build a CSR-based graph:
//!
//! ```
//! use graph_builder::prelude::*;
//!
//! let graph: DirectedCsrGraph<usize> = GraphBuilder::new()
//! .edges(vec![(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)])
//! .build();
//!
//! assert_eq!(graph.node_count(), 4);
//! assert_eq!(graph.edge_count(), 5);
//!
//! assert_eq!(graph.out_degree(1), 2);
//! assert_eq!(graph.in_degree(1), 1);
//!
//! assert_eq!(graph.out_neighbors(1).as_slice(), &[2, 3]);
//! assert_eq!(graph.in_neighbors(1).as_slice(), &[0]);
//! ```
//!
//! ## Adjacency List (AL)
//!
//! In the Adjacency List implementation, we essentially store the graph as `Vec<Vec<ID>>`. The outer
//! `Vec` has a length of `node_count` and at each index, we store the neighbors for that particular
//! node in its own, heap-allocated `Vec`.
//!
//! The downside of that representation is that - compared to CSR - it is expected to be slower, both
//! in building it and also in reading from it, as cache misses are becoming more likely due to the
//! isolated heap allocations for individual neighbor lists.
//!
//! However, in contrast to CSR, an adjacency list is mutable, i.e., it is possible to add edges to the
//! graph even after it has been built. This makes the data structure interesting for more flexible graph
//! construction frameworks or for algorithms that need to add new edges as part of the computation.
//! Currently, adding edges is constrained by source and target node already existing in the graph.
//!
//! Internally, the individual neighbor lists for each node are protected by a `Mutex` in order to support
//! parallel read and write operations on the graph topology.
//!
//! One can use [`DirectedALGraph`] or [`UndirectedALGraph`] to build a Adjacency-List-based graph:
//!
//! ```
//! use graph_builder::prelude::*;
//!
//! let graph: DirectedALGraph<usize> = GraphBuilder::new()
//! .edges(vec![(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)])
//! .build();
//!
//! assert_eq!(graph.node_count(), 4);
//! assert_eq!(graph.edge_count(), 5);
//!
//! assert_eq!(graph.out_degree(1), 2);
//! assert_eq!(graph.in_degree(1), 1);
//!
//! assert_eq!(graph.out_neighbors(1).as_slice(), &[2, 3]);
//! assert_eq!(graph.in_neighbors(1).as_slice(), &[0]);
//!
//! // Let's mutate the graph by adding another edge
//! graph.add_edge(1, 0);
//! assert_eq!(graph.edge_count(), 6);
//! assert_eq!(graph.out_neighbors(1).as_slice(), &[2, 3, 0]);
//! ```
pub mod builder;
mod compat;
pub mod graph;
pub mod graph_ops;
pub mod index;
pub mod input;
pub mod prelude;
pub use crate::builder::GraphBuilder;
pub use crate::graph::adj_list::DirectedALGraph;
pub use crate::graph::adj_list::UndirectedALGraph;
pub use crate::graph::csr::CsrLayout;
pub use crate::graph::csr::DirectedCsrGraph;
pub use crate::graph::csr::UndirectedCsrGraph;
use std::convert::Infallible;
use crate::graph::Target;
use crate::index::Idx;
use thiserror::Error;
#[derive(Error, Debug)]
pub enum Error {
#[error("error while loading graph")]
IoError {
#[from]
source: std::io::Error,
},
#[error("incompatible index type")]
IdxError {
#[from]
source: std::num::TryFromIntError,
},
#[cfg(feature = "gdl")]
#[cfg_attr(all(feature = "gdl", has_doc_cfg), doc(cfg(feature = "gdl")))]
#[error("error while parsing GDL input")]
GdlError {
#[from]
source: gdl::graph::GraphHandlerError,
},
#[error("invalid partitioning")]
InvalidPartitioning,
#[error("number of node values must be the same as node count")]
InvalidNodeValues,
#[error("invalid id size, expected {expected:?} bytes, got {actual:?} bytes")]
InvalidIdType { expected: String, actual: String },
#[error("node {node:?} does not exist in the graph")]
MissingNode { node: String },
}
impl From<Infallible> for Error {
fn from(_: Infallible) -> Self {
unreachable!()
}
}
/// A graph is a tuple `(N, E)`, where `N` is a set of nodes and `E` a set of
/// edges. Each edge connects exactly two nodes.
///
/// `Graph` is parameterized over the node index type `Node` which is used to
/// uniquely identify a node. An edge is a tuple of node identifiers.
pub trait Graph<NI: Idx> {
/// Returns the number of nodes in the graph.
fn node_count(&self) -> NI;
/// Returns the number of edges in the graph.
fn edge_count(&self) -> NI;
}
/// A graph that allows storing a value per node.
pub trait NodeValues<NI: Idx, NV> {
fn node_value(&self, node: NI) -> &NV;
}
pub trait UndirectedDegrees<NI: Idx> {
/// Returns the number of edges connected to the given node.
fn degree(&self, node: NI) -> NI;
}
/// Returns the neighbors of a given node.
///
/// The edge `(42, 1337)` is equivalent to the edge `(1337, 42)`.
pub trait UndirectedNeighbors<NI: Idx> {
type NeighborsIterator<'a>: Iterator<Item = &'a NI>
where
Self: 'a;
/// Returns an iterator of all nodes connected to the given node.
fn neighbors(&self, node: NI) -> Self::NeighborsIterator<'_>;
}
/// Returns the neighbors of a given node.
///
/// The edge `(42, 1337)` is equivalent to the edge `(1337, 42)`.
pub trait UndirectedNeighborsWithValues<NI: Idx, EV> {
type NeighborsIterator<'a>: Iterator<Item = &'a Target<NI, EV>>
where
Self: 'a,
EV: 'a;
/// Returns an iterator of all nodes connected to the given node
/// including the value of the connecting edge.
fn neighbors_with_values(&self, node: NI) -> Self::NeighborsIterator<'_>;
}
pub trait DirectedDegrees<NI: Idx> {
/// Returns the number of edges where the given node is a source node.
fn out_degree(&self, node: NI) -> NI;
/// Returns the number of edges where the given node is a target node.
fn in_degree(&self, node: NI) -> NI;
}
/// Returns the neighbors of a given node either in outgoing or incoming direction.
///
/// An edge tuple `e = (u, v)` has a source node `u` and a target node `v`. From
/// the perspective of `u`, the edge `e` is an **outgoing** edge. From the
/// perspective of node `v`, the edge `e` is an **incoming** edge. The edges
/// `(u, v)` and `(v, u)` are not considered equivalent.
pub trait DirectedNeighbors<NI: Idx> {
type NeighborsIterator<'a>: Iterator<Item = &'a NI>
where
Self: 'a;
/// Returns an iterator of all nodes which are connected in outgoing direction
/// to the given node, i.e., the given node is the source node of the
/// connecting edge.
fn out_neighbors(&self, node: NI) -> Self::NeighborsIterator<'_>;
/// Returns an iterator of all nodes which are connected in incoming direction
/// to the given node, i.e., the given node is the target node of the
/// connecting edge.
fn in_neighbors(&self, node: NI) -> Self::NeighborsIterator<'_>;
}
/// Returns the neighbors of a given node either in outgoing or incoming direction.
///
/// An edge tuple `e = (u, v)` has a source node `u` and a target node `v`. From
/// the perspective of `u`, the edge `e` is an **outgoing** edge. From the
/// perspective of node `v`, the edge `e` is an **incoming** edge. The edges
/// `(u, v)` and `(v, u)` are not considered equivale
pub trait DirectedNeighborsWithValues<NI: Idx, EV> {
type NeighborsIterator<'a>: Iterator<Item = &'a Target<NI, EV>>
where
Self: 'a,
EV: 'a;
/// Returns an iterator of all nodes which are connected in outgoing direction
/// to the given node, i.e., the given node is the source node of the
/// connecting edge. For each connected node, the value of the connecting
/// edge is also returned.
fn out_neighbors_with_values(&self, node: NI) -> Self::NeighborsIterator<'_>;
/// Returns an iterator of all nodes which are connected in incoming direction
/// to the given node, i.e., the given node is the target node of the
/// connecting edge. For each connected node, the value of the connecting
/// edge is also returned.
fn in_neighbors_with_values(&self, node: NI) -> Self::NeighborsIterator<'_>;
}
/// Allows adding new edges to a graph.
pub trait EdgeMutation<NI: Idx> {
/// Adds a new edge between the given source and target node.
///
/// # Errors
///
/// If either the source or the target node does not exist,
/// the method will return [`Error::MissingNode`].
fn add_edge(&self, source: NI, target: NI) -> Result<(), Error>;
}
/// Allows adding new edges to a graph.
pub trait EdgeMutationWithValues<NI: Idx, EV> {
/// Adds a new edge between the given source and target node
/// and assigns the given value to it.
///
/// # Errors
///
/// If either the source or the target node does not exist,
/// the method will return [`Error::MissingNode`].
fn add_edge_with_value(&self, source: NI, target: NI, value: EV) -> Result<(), Error>;
}
#[repr(transparent)]
pub struct SharedMut<T>(*mut T);
unsafe impl<T: Send> Send for SharedMut<T> {}
unsafe impl<T: Sync> Sync for SharedMut<T> {}
impl<T> SharedMut<T> {
pub fn new(ptr: *mut T) -> Self {
SharedMut(ptr)
}
delegate::delegate! {
to self.0 {
/// # Safety
///
/// Ensure that `count` does not exceed the capacity of the Vec.
pub unsafe fn add(&self, count: usize) -> *mut T;
}
}
}