query_graph/
lib.rs

1use std::{
2    cell::RefCell,
3    fmt::Debug,
4    hash::Hash,
5    sync::{Arc, OnceLock},
6};
7
8use hashbrown::HashSet;
9use map::ConcurrentMap;
10use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
11
12mod map;
13
14/// The `Graph` struct represents a concurrent query dependency graph. It provides
15/// the infrastructure for managing, resolving, and optimizing a wide range of
16/// queries across a variety of applications, including but not limited to
17/// concurrent incremental compilers.
18///
19/// # Overview
20///
21/// A `Graph` instance serves as the central data structure for tracking and managing
22/// dependencies between queries (`Q`) and their respective results (`R`). This data
23/// structure is completely agnostic to the specific use case, and its generic nature
24/// makes it adaptable to a multitude of scenarios beyond compiler optimization.
25///
26/// # Query Resolution
27///
28/// The `Graph` type allows for concurrent query resolution, where queries can be
29/// resolved in parallel, enhancing performance significantly. The `increment` method
30/// does not block and returns a new iteration of the graph immediately. In fact,
31/// the `Graph.query` method is designed to block as little as possible. In the
32/// general case it will block extremely infrequently. This means that queries
33/// will be able to continue chugging away as fast as possible with little to no
34/// interruptions.
35///
36/// # Structure
37///
38/// - `new`: A `QueryNodeMap` representing the current state of queries for the
39///   current iteration. All new queries and their results are stored in this map.
40///   It begins each iteration as an empty map.
41///
42/// - `old`: A `QueryNodeMap` serving as a reference to the map from the previous
43///   iteration. It is used for validating queries and their results from the
44///   current iteration. This reference mechanism provides an efficient way to
45///   track and compare query changes across iterations.
46///
47/// - `resolver`: An associated type (`ResolveQuery`) used to resolve queries
48///   and obtain their results. This type may carry its own state as long as it
49///   implements the `Sync` and `Send` traits, enabling it to work seamlessly in a
50///   multithreaded environment.
51///
52/// # Incremental Compilation
53///
54/// In the context of a concurrent incremental compiler, only queries that are
55/// determined to be outdated are resolved again. This approach greatly reduces the
56/// time required to resolve queries, making the example compiler incremental.
57/// The semantic model is still built every time you mutate the compiler's state,
58/// but most of the work is already done and can be reused, resulting in extremely
59/// fast model building.
60///
61/// # Usage
62///
63/// The `Graph` type can be employed in a wide range of applications, from compilers
64/// to data processing pipelines and more. Its flexibility allows developers to adapt
65/// it to their specific use cases.
66///
67/// # Considerations
68///
69/// - The `Graph` is a versatile data structure that can be applied in various
70///   scenarios beyond compilers. Its flexibility allows developers to adapt it to
71///   their specific use cases.
72///
73/// - The resolver associated with the `Graph` should be chosen based on the
74///   requirements of the application and its thread safety characteristics.
75pub struct Graph<Q, R> {
76    /// The new map is used for all the queries in this iteration.
77    /// This map always starts empty.
78    new: QueryNodeMap<Q, R>,
79    /// The old map is used for validating queries from this iteration.
80    /// It's just a reference to the map from the previous iteration and
81    /// so is very efficient.
82    old: QueryNodeMap<Q, R>,
83    /// The resolver used to resolve queries. The resolver can have its
84    /// own state as long as it's Sync + Send.
85    resolver: Box<dyn ResolveQuery<Q, R>>,
86}
87
88#[derive(Debug)]
89struct Node<Q, R> {
90    result: R,
91    changed: bool,
92    edges_from: Arc<HashSet<Q>>,
93}
94
95type QueryNodeMap<Q, R> = Arc<ConcurrentMap<Q, Arc<OnceLock<Node<Q, R>>>>>;
96
97impl<Q: Debug + Clone + Eq + Hash, R: Debug + Clone> Debug for Graph<Q, R> {
98    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99        f.debug_struct("Graph")
100            .field("new", &self.new)
101            .field("old", &self.old)
102            .finish()
103    }
104}
105
106impl<Q: Clone + Eq + Hash + Send + Sync, R: Clone + Eq + Send + Sync> Graph<Q, R> {
107    pub fn new(resolver: impl ResolveQuery<Q, R> + 'static) -> Arc<Self> {
108        Arc::new(Self {
109            new: Arc::new(ConcurrentMap::new()),
110            old: Arc::new(ConcurrentMap::new()),
111            resolver: Box::new(resolver),
112        })
113    }
114
115    pub fn query(self: &Arc<Self>, q: Q) -> R {
116        let node = self.get_node(&q);
117        let node = node.get_or_init(|| self.resolve(q));
118        node.result.clone()
119    }
120
121    fn get_node(self: &Arc<Self>, q: &Q) -> Arc<OnceLock<Node<Q, R>>> {
122        self.new
123            .get_or_insert(q.clone(), || Arc::new(OnceLock::default()))
124    }
125
126    fn resolve(self: &Arc<Self>, q: Q) -> Node<Q, R> {
127        if let Some(old) = self.old.get(&q) {
128            // Since there was an old node we have to validate it.
129            let old_node = old.get();
130
131            if let Some(old_node) = old_node {
132                if old_node.edges_from.len() == 0 {
133                    // Since the node had no dependencies (a root node) we must
134                    // resolve it again to see if it changed.
135                    let resolver = Arc::new(QueryResolver::new(self.clone()));
136                    let result = self.resolver.resolve(q, resolver.clone());
137
138                    Node {
139                        // This is very important and crucial to the whole system
140                        // working. If the result is the same as the old result then
141                        // changed must be false. This prevents nodes from needlessly
142                        // being resolved again when their old values can be used
143                        // instead.
144                        changed: result != old_node.result,
145                        result,
146                        edges_from: Arc::new(resolver.edges_from.take()),
147                    }
148                } else {
149                    let any_changed = old_node.edges_from.par_iter().any(|parent| {
150                        let node = self.get_node(parent);
151                        let node = node.get_or_init(|| self.resolve(parent.clone()));
152
153                        node.changed
154                    });
155
156                    if any_changed {
157                        // Since at least one dependency of this query has changed
158                        // we have to resolve this query again.
159                        let resolver = Arc::new(QueryResolver::new(self.clone()));
160                        let result = self.resolver.resolve(q, resolver.clone());
161
162                        Node {
163                            // This is very important and crucial to the whole system
164                            // working. If the result is the same as the old result then
165                            // changed must be false. This prevents nodes from needlessly
166                            // being resolved again when their old values can be used
167                            // instead.
168                            changed: result != old_node.result,
169                            result,
170                            edges_from: Arc::new(resolver.edges_from.take()),
171                        }
172                    } else {
173                        // The old result is still valid so we just clone it.
174                        Node {
175                            result: old_node.result.clone(),
176                            edges_from: old_node.edges_from.clone(),
177                            changed: false,
178                        }
179                    }
180                }
181            } else {
182                // Since the old node is not resolved yet we will just resolve
183                // it from scratch.
184                let resolver = Arc::new(QueryResolver::new(self.clone()));
185                let result = self.resolver.resolve(q, resolver.clone());
186
187                Node {
188                    // We need to check again if the old node is still unresolved. Because
189                    // if it isn't we can set changed to old_result != result. Otherwise,
190                    // we always set changed to true.
191                    changed: match old.get() {
192                        Some(old_node) => result != old_node.result,
193                        None => true,
194                    },
195                    result,
196                    edges_from: Arc::new(resolver.edges_from.take()),
197                }
198            }
199        } else {
200            // Since the node isn't in the old map then the query is new and resolved
201            // from scratch.
202            let resolver = Arc::new(QueryResolver::new(self.clone()));
203            let result = self.resolver.resolve(q, resolver.clone());
204
205            Node {
206                result,
207                // Since this is a new node, changed is always false.
208                changed: false,
209                edges_from: Arc::new(resolver.edges_from.take()),
210            }
211        }
212    }
213
214    pub fn increment(self: &Arc<Self>, resolver: impl ResolveQuery<Q, R> + 'static) -> Arc<Self> {
215        Arc::new(Self {
216            new: Arc::new(ConcurrentMap::new()),
217            old: self.new.clone(),
218            resolver: Box::new(resolver),
219        })
220    }
221}
222
223pub struct QueryResolver<Q, R> {
224    graph: Arc<Graph<Q, R>>,
225    edges_from: RefCell<HashSet<Q>>,
226}
227
228unsafe impl<Q, R> Send for QueryResolver<Q, R> {}
229unsafe impl<Q, R> Sync for QueryResolver<Q, R> {}
230
231impl<Q: Clone + Eq + Hash + Send + Sync, R: Clone + Eq + Send + Sync> QueryResolver<Q, R> {
232    fn new(graph: Arc<Graph<Q, R>>) -> Self {
233        Self {
234            graph,
235            edges_from: RefCell::new(HashSet::new()),
236        }
237    }
238
239    pub fn query(&self, q: Q) -> R {
240        let result = self.graph.query(q.clone());
241        self.edges_from.borrow_mut().insert(q);
242        // TODO: edges_to (maybe?).
243        result
244    }
245}
246
247pub trait ResolveQuery<Q, R>: Send + Sync {
248    fn resolve(&self, q: Q, resolve: Arc<QueryResolver<Q, R>>) -> R;
249}