pargraph/lib.rs
1// SPDX-FileCopyrightText: 2022 Thomas Kramer <code@tkramer.ch>
2//
3// SPDX-License-Identifier: GPL-3.0-or-later
4
5//! ParaGraph: Parallel graph processing.
6//!
7//! This crate implements datastructures and algorithms for concurrent traversal of graphs.
8//! Graphs can be processed using 'operators'. An operator can see only a small part of a graph, namely
9//! the 'active' node and its direct neighbors. Labelling operators can edit the associated data of the active
10//! node and they can generate a set of new nodes which should be processed later. The order how the nodes are processed
11//! is largely defined by the 'worklists'.
12//!
13//! # Operators
14//! There are the following types of operators:
15//! * [`ReadonlyOperator`] - Can only access graph elements by immutable reference. Needs to use interior mutability if modification is necessary.
16//! * [`LabellingOperator`] - Can modify the local node data. The executor must provide a mutable reference to local node data.
17//!
18//! # Executors
19//! There are the following executors:
20//! * [`executors::single_thread::SingleThreadExecutor`] - Executes an operator in the current thread.
21//! * [`executors::multi_thread::MultiThreadExecutor`] - Executes an operator on many threads. Imposes stricter trait bounds on operators and graph data structures.
22//!
23//!
24//! # Example: compute cone of influence using atomics
25//!
26//! The following example visits the output cone of the `src` node. The output cone consists of all nodes
27//! which can be reached by starting at `src` and then following outgoing edges.
28//! Additionally, for each node in the cone, the operator keeps track of input nodes which are in the cone.
29//!
30//! Similar algorithms can for example be used to mark the regions of interest for incremental updates for shortest path searches.
31//!
32//! This algorithm is implemented as a [`ReadonlyOperator`] which operates on immutable references of the node data.
33//! Safe mutability of the node data is still achieved using atomics. This avoids wrapping the node data into a
34//! [`DataCell`] for locking.
35//!
36//! ```
37//! use pargraph::prelude::*;
38//! use petgraph::data::DataMap;
39//! use petgraph::graph::DiGraph;
40//! use petgraph::visit::*;
41//! use std::sync::atomic::AtomicU32;
42//!
43//! struct NodeData {
44//! /// Count the number of input edges to the node
45//! /// which are part of the cone.
46//! num_dependencies: AtomicU32,
47//! }
48//!
49//! impl NodeData {
50//! fn new() -> Self {
51//! Self {
52//! num_dependencies: AtomicU32::new(0),
53//! }
54//! }
55//! }
56//!
57//! // Create a graph like:
58//! // x---
59//! // | \
60//! // src y
61//! // |
62//! // a
63//! // / \
64//! // b c
65//! // \ /
66//! // d
67//! let mut g = DiGraph::new();
68//!
69//! // Helper function for creating new nodes with default node data.
70//! // Initialize the distance to the maximum value.
71//! let mut new_node = || g.add_node(NodeData::new());
72//!
73//! // Create some new nodes.
74//! let [x, y, src, a, b, c, d] = [(); 7].map(|_| new_node());
75//!
76//! // Add some edges (without any weights).
77//! g.add_edge(x, src, ());
78//! g.add_edge(x, y, ());
79//! g.add_edge(src, a, ());
80//! g.add_edge(a, b, ());
81//! g.add_edge(a, c, ());
82//! g.add_edge(c, d, ());
83//! g.add_edge(b, d, ());
84//!
85//! let operator = ConeOfInfluenceOp {};
86//!
87//! let executor = MultiThreadExecutor::new();
88//!
89//! // Create a worklist and add the source node.
90//! let wl = FifoWorklist::new_with_local_queues(vec![src].into());
91//! executor.run_readonly(wl, &operator, &g);
92//!
93//! let get_num_dependencies = |n: petgraph::graph::NodeIndex| -> u32 {
94//! g.node_weight(n)
95//! .unwrap()
96//! .num_dependencies
97//! .load(std::sync::atomic::Ordering::Relaxed)
98//! };
99//!
100//! // Check the distances.
101//! assert_eq!(get_num_dependencies(x), 0, "x is not in the cone of influence of src");
102//! assert_eq!(get_num_dependencies(y), 0, "y is not in the cone of influence of src");
103//! assert_eq!(get_num_dependencies(src), 0);
104//! assert_eq!(get_num_dependencies(a), 1);
105//! assert_eq!(get_num_dependencies(b), 1);
106//! assert_eq!(get_num_dependencies(c), 1);
107//! assert_eq!(get_num_dependencies(d), 2);
108//!
109//! // This is our operator.
110//! struct ConeOfInfluenceOp {}
111//!
112//! // We can implement this operator as a `ReadonlyOperator` because it does not require
113//! // a mutable reference to the node data. Safe mutability is achieved using atomics.
114//! // Note that we implement the operator for the reference type. Operators are required to implement `Clone`.
115//! // A reference implements `Clone` automatically. Alternatively we could also derive `Clone` for `ConeOfInfluenceOp`
116//! // and pass ownership of the operator to the executor. The executor might create clones of the operators for the worker
117//! // threads.
118//! impl<G> ReadonlyOperator<G> for &ConeOfInfluenceOp
119//! where
120//! G: GraphBase + IntoEdgesDirected,
121//! G: DataMap<NodeWeight = NodeData>,
122//! {
123//! type WorkItem = G::NodeId;
124//!
125//! fn op(
126//! &self,
127//! active_node: Self::WorkItem,
128//! local_view: LocalGraphView<&G>,
129//! mut worklist: impl WorklistPush<Self::WorkItem>,
130//! ) {
131//! let output_nodes =
132//! local_view.neighbors_directed(active_node, petgraph::Direction::Outgoing);
133//!
134//! for n in output_nodes {
135//! // Access the node weight.
136//! let n_data = local_view
137//! .node_weight(n)
138//! .expect("all nodes should have a weight");
139//!
140//! // Atomically increment the number of dependencies of the node `n`.
141//! // `fetch_add` returns the previous value. If the previous value is `0` then
142//! // we know that this is the first time we look at node `n` (unless there is a cycle leading to the source node).
143//! let previous_num_dependencies = n_data
144//! .num_dependencies
145//! .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
146//!
147//! if previous_num_dependencies == 0 {
148//! // This is the first time n is touched.
149//! worklist.push(n);
150//! }
151//! }
152//! }
153//! }
154//! ```
155
156#![deny(missing_docs)]
157#![deny(unused_imports)]
158
159mod conflict_resolution;
160pub mod executors;
161pub mod id_rwlock;
162pub mod local_view;
163pub mod prelude;
164pub mod worklists;
165
166use std::{borrow::Borrow, fmt::Debug};
167
168use petgraph::visit::GraphBase;
169
170use self::{
171 id_rwlock::IdLock,
172 local_view::{FullConflictDetection, LocalGraphView},
173 worklists::WorklistPush,
174};
175
176/// An graph operator which can only modify labels of nodes and edges
177/// but cannot change the topology of a graph.
178pub trait LabellingOperator<G>
179where
180 G: GraphBase,
181{
182 /// Data type used for label of the nodes.
183 type NodeWeight;
184
185 /// Type used for representing a graph node.
186 ///
187 /// In order to allow including associated data, the work item can be any type which
188 /// allows to borrow a reference to the graph node ID.
189 /// Because `Borrow<T>` is by default also implemented for `T`, this allows to use graph node IDs
190 /// directly as a `WorkItem`.
191 type WorkItem: Borrow<G::NodeId>;
192
193 /// Do the graph operation on the given active node.
194 ///
195 /// `local_data` provides exclusive mutable access to the node data of the active node.
196 /// The operator can request read access to its neighborhood using `local_view.try_node_weight(n)`.
197 /// Nodes which should become active nodes later can be pushed to the worklist with
198 /// the `worklist.push()` function.
199 /// In order for data conflicts to be handled, the errors during locking of node or edge data must be propagated to the caller.
200 /// Modifications to the `local_data` are *not* reverted upon a data conflict.
201 fn op(
202 &self,
203 work_item: Self::WorkItem,
204 local_view: LocalGraphView<&G, FullConflictDetection>,
205 local_data: &mut Self::NodeWeight,
206 worklist: impl WorklistPush<Self::WorkItem>,
207 ) -> Result<(), DataConflictErr<G::NodeId, G::EdgeId>>;
208}
209
210/// Operator which only gets read access to the graph.
211/// The implementation can still write to graph/edge weights
212/// but needs to care about synchronization itself by using apropriate
213/// schemes (for example `RwLock` and atomics).
214pub trait ReadonlyOperator<G>
215where
216 G: GraphBase,
217{
218 /// Type used for representing a graph node.
219 ///
220 /// In order to allow including associated data, the work item can be any type which
221 /// allows to borrow a reference to the graph node ID.
222 /// Because `Borrow<T>` is by default also implemented for `T`, this allows to use graph node IDs
223 /// directly as a `WorkItem`.
224 type WorkItem: Borrow<G::NodeId>;
225
226 /// Do the graph operation on the given active node.
227 fn op(
228 &self,
229 work_item: Self::WorkItem,
230 local_view: LocalGraphView<&G>,
231 worklist: impl WorklistPush<Self::WorkItem>,
232 );
233}
234
235/// A data conflict error which can happen in two ways:
236/// 1) The executor tries to get write access to the data of the active node but there's already some other threads reading or writing to it.
237/// 2) The operator tries to get read access to data of other nodes/edges but the data is already locked for write access.
238#[derive(Debug, Copy, Clone)]
239pub struct DataConflictErr<Node, Edge> {
240 /// The graph element which was tried to be accessed. TODO: This might not be used.
241 _graph_element: GraphElement<Node, Edge>,
242 /// The error caused by trying to acquire the lock.
243 err: LockingErr,
244}
245
246/// Either a node or edge ID.
247#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
248enum GraphElement<N, E> {
249 NodeId(N),
250 EdgeId(E),
251}
252
253/// Bundle read/write locking errors into one type.
254#[derive(Debug, Copy, Clone)]
255enum LockingErr {
256 /// Failed to acquire a read lock.
257 ReadErr(id_rwlock::IdLockReadErr),
258 /// Failed to acquire a write lock.
259 WriteErr(id_rwlock::IdLockWriteErr),
260}
261
262impl From<id_rwlock::IdLockReadErr> for LockingErr {
263 fn from(e: id_rwlock::IdLockReadErr) -> Self {
264 Self::ReadErr(e)
265 }
266}
267
268impl From<id_rwlock::IdLockWriteErr> for LockingErr {
269 fn from(e: id_rwlock::IdLockWriteErr) -> Self {
270 Self::WriteErr(e)
271 }
272}
273
274/// Wrapper for data associated with graph elements.
275/// Used for interior mutability during parallel graph processing.
276pub type DataCell<T> = IdLock<T>;
277
278/// This trait is used to allow using arbitrary data types for node weights
279/// as long as it is possible to use a `DataCell` from them.
280pub trait BorrowDataCell {
281 /// Data type which is wrapped to get synchronized access.
282 type UserData;
283
284 /// Get a reference to a [`DataCell`] structure.
285 fn borrow_data_cell(&self) -> &DataCell<Self::UserData>;
286}
287
288impl<T> BorrowDataCell for DataCell<T> {
289 type UserData = T;
290
291 fn borrow_data_cell(&self) -> &DataCell<Self::UserData> {
292 self
293 }
294}
295
296// /// Bundle of a graph node and a priority.
297// /// This type implements `PartialOrd` and `Ord` based on the value of the priority (the value of the node is ignored).
298// /// Priorities can be disabled by using the `()` type for priorities.
299// /// Then the memory representation of this struct is equal to the one of the type used for graph nodes.
300// #[derive(Clone, Copy, Debug, Eq, PartialEq)]
301// pub struct WithPriority<N, P> {
302// /// Priority associated with the graph node. Can be of type `()` to disable priorities.
303// pub priority: P,
304// /// ID of the graph node.
305// pub node: N,
306// }
307//
308// impl<N> From<N> for WithPriority<N, ()> {
309// fn from(node: N) -> WithPriority<N, ()> {
310// WithPriority::new(node)
311// }
312// }
313//
314// impl<N, P> From<(N, P)> for WithPriority<N, P> {
315// fn from((node, priority): (N, P)) -> Self {
316// Self::new_with_priority(node, priority)
317// }
318// }
319//
320// impl<N, P> WithPriority<N, P> {
321// pub fn new_with_priority(node: N, priority: P) -> Self {
322// Self { priority, node }
323// }
324// }
325//
326// impl<N> WithPriority<N, ()> {
327// /// Create a new struct with priorities disabled.
328// pub fn new(node: N) -> Self {
329// Self { priority: (), node }
330// }
331// }
332//
333// impl<N, P> PartialOrd for WithPriority<N, P>
334// where
335// P: PartialOrd,
336// N: PartialEq,
337// {
338// fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
339// self.priority.partial_cmp(&other.priority)
340// }
341// }
342//
343// impl<N, P> Ord for WithPriority<N, P>
344// where
345// P: Ord,
346// N: PartialEq + Eq,
347// {
348// fn cmp(&self, other: &Self) -> std::cmp::Ordering {
349// self.priority.cmp(&other.priority)
350// }
351// }