incremental_query/
context.rs

1use std::{
2    any::Any,
3    cell::RefCell,
4    collections::{HashMap, HashSet, VecDeque},
5    hash::Hash,
6    mem,
7    num::NonZeroUsize,
8    ops::Deref,
9};
10
11use siphasher::sip128::Hasher128;
12use tracing::Level;
13
14use crate::query::{QueryColor, QueryMode};
15
16use super::{
17    generation::Generation,
18    query::{Query, QueryInstance},
19    query_parameter::{QueryParameter, TypeErasedQueryParam},
20    storage::Storage,
21    QueryHasher,
22};
23
24// inspired by https://smallcultfollowing.com/babysteps/blog/2015/04/06/modeling-graphs-in-rust-using-vector-indices/
25struct Node<'cx, T> {
26    query_instance: QueryInstance<'cx, T>,
27
28    // 1-based indices in the edges array
29    first_edge_index: Option<NonZeroUsize>,
30    last_edge_index: Option<NonZeroUsize>,
31}
32
33#[derive(Copy, Clone)]
34struct Edge {
35    // index in the nodes array
36    node: usize,
37    // 1-based index in the edges array
38    next_edge: Option<NonZeroUsize>,
39}
40
41struct OutgoingEdgeIterator {
42    curr: Option<(Edge, NonZeroUsize)>,
43}
44
45impl OutgoingEdgeIterator {
46    fn next_node<T>(&mut self, cx: &Context<T>) -> Option<usize> {
47        self.next_node_inner(&cx.inner.borrow().edges)
48    }
49
50    fn next_node_inner(&mut self, edges: &[Edge]) -> Option<usize> {
51        let (Edge { node, next_edge }, _) = self.curr?;
52        self.curr = next_edge.map(|e| (edges[e.get()], e));
53        Some(node)
54    }
55
56    fn next_edge(&mut self, edges: &[Edge]) -> Option<(Edge, NonZeroUsize)> {
57        let edge = self.curr;
58        self.next_node_inner(edges);
59        edge
60    }
61}
62
63struct CycleDetection {
64    history: Vec<u128>,
65    lookup: HashSet<u128>,
66}
67const NO_HASHSET_LEN: usize = 10;
68
69impl CycleDetection {
70    pub fn new() -> Self {
71        Self {
72            history: Vec::new(),
73            lookup: HashSet::new(),
74        }
75    }
76
77    pub fn push_is_cycle(&mut self, input_hash: u128) -> bool {
78        if self.lookup.len() < NO_HASHSET_LEN {
79            let cycle = self.history.contains(&input_hash);
80            if !cycle {
81                self.history.push(input_hash);
82            }
83            cycle
84        } else {
85            if self.lookup.is_empty() && NO_HASHSET_LEN > 0 {
86                for &i in &self.history {
87                    self.lookup.insert(i);
88                }
89            }
90
91            let cycle = self.lookup.insert(input_hash);
92            if !cycle {
93                self.history.push(input_hash);
94            }
95
96            cycle
97        }
98    }
99
100    pub fn pop(&mut self) -> u128 {
101        let v = self.history.pop().expect("some value");
102        if !self.lookup.is_empty() && NO_HASHSET_LEN > 0 {
103            assert!(self.lookup.remove(&v));
104        }
105        v
106    }
107
108    fn history(&self) -> impl Iterator<Item = u128> {
109        self.history.clone().into_iter().rev()
110    }
111}
112
113struct Inner<'cx, T> {
114    // index in the nodes array
115    curr: Option<usize>,
116
117    nodes: Vec<Node<'cx, T>>,
118    edges: Vec<Edge>,
119
120    roots: Vec<u128>,
121
122    // map hash to node index
123    lookup: HashMap<u128, usize>,
124
125    cycle_detection: CycleDetection,
126
127    generation: Generation,
128
129    cache_enabled: bool,
130}
131
132impl<'cx, T> Inner<'cx, T> {
133    fn get_node(&self, node: usize) -> &Node<'cx, T> {
134        &self.nodes[node]
135    }
136    fn get_node_mut(&mut self, node: usize) -> &mut Node<'cx, T> {
137        &mut self.nodes[node]
138    }
139
140    fn add_edge_between(&mut self, src: usize, dst: usize) {
141        let src_node = &mut self.nodes[src];
142
143        let edge_index = NonZeroUsize::new(self.edges.len()).unwrap();
144        self.edges.push(Edge {
145            node: dst,
146            next_edge: None,
147        });
148
149        if src_node.first_edge_index.is_none() {
150            src_node.first_edge_index = Some(edge_index);
151            src_node.last_edge_index = Some(edge_index);
152        } else if let Some(ref mut i) = src_node.last_edge_index {
153            assert!(
154                self.edges[i.get()].next_edge.is_none(),
155                "should be none because it's the last in the chain"
156            );
157            self.edges[i.get()].next_edge = Some(edge_index);
158            *i = edge_index;
159        }
160    }
161
162    fn outgoing_edges(&self, node: usize) -> OutgoingEdgeIterator {
163        outgoing_edges::<T>(node, &self.nodes, &self.edges)
164    }
165}
166
167fn outgoing_edges<T>(node: usize, nodes: &[Node<T>], edges: &[Edge]) -> OutgoingEdgeIterator {
168    let node = &nodes[node];
169    let first_edge_index = node.first_edge_index;
170
171    OutgoingEdgeIterator {
172        curr: first_edge_index.map(|e| (edges[e.get()], e)),
173    }
174}
175
176/// The context is the most important part of incremental compilation,
177/// the structure through which queries are run and that caches query invocations.
178pub struct Context<'cx, T = ()> {
179    inner: RefCell<Inner<'cx, T>>,
180    pub storage: &'cx Storage,
181    pub data: T,
182}
183
184impl<T> Deref for Context<'_, T> {
185    type Target = T;
186
187    fn deref(&self) -> &Self::Target {
188        &self.data
189    }
190}
191
192impl<'cx> Context<'cx, ()> {
193    pub fn new(storage: &'cx Storage) -> Self {
194        Self::with_data(storage, ())
195    }
196}
197
198impl<'cx, T> Context<'cx, T> {
199    pub fn with_data(storage: &'cx Storage, data: T) -> Self {
200        Self {
201            storage,
202            inner: RefCell::new(Inner {
203                curr: None,
204                nodes: vec![],
205                edges: vec![
206                    // dummy edge because edge indexes are 1-based,
207                    // never referenced!
208                    Edge {
209                        node: 0,
210                        next_edge: None,
211                    },
212                ],
213                generation: Generation::new(),
214                lookup: HashMap::new(),
215                cycle_detection: CycleDetection::new(),
216                roots: Vec::new(),
217                cache_enabled: true,
218            }),
219            data,
220        }
221    }
222
223    pub fn set_cache_enabled(&self, enabled: bool) {
224        self.inner.borrow_mut().cache_enabled = enabled;
225    }
226
227    pub fn next_generation(&self) {
228        tracing::debug!("GENERATION {}", self.inner.borrow().generation.0);
229        self.inner.borrow_mut().generation.next();
230    }
231
232    /// Garbage collect the context. Deletes unreachable notes.
233    /// Reachability is evaluated based on "root" nodes.
234    ///
235    /// Some queries are run as part of another query in the query system,
236    /// I like to call those queries that run "inside" the query system.
237    /// Other queries are run from "outside" the query system after you
238    /// created a new [`Context`] and want to run your first query.
239    ///
240    /// These outside queries are considered root queries, and any node reachable
241    /// from them is kept alive by [`gc`](Context::gc).
242    ///
243    /// After this, all objects are allocated in `new_storage` and you can delete
244    /// the old backing storage to actually save space.
245    pub fn gc(self, new_storage: &Storage) -> Context<'_, T> {
246        let Inner {
247            curr,
248            mut nodes,
249            mut edges,
250            roots,
251            lookup,
252            cycle_detection,
253            generation,
254            cache_enabled,
255        } = self.inner.into_inner();
256
257        tracing::info!("old edges size: {}", edges.len());
258        tracing::info!("old nodes size: {}", nodes.len());
259        tracing::info!("old storage size: {}", self.storage.size());
260
261        let mut nodes_index_map = HashMap::new();
262        let mut edges_index_map = HashMap::new();
263        edges_index_map.insert(0, 0);
264        let mut todo = VecDeque::new();
265
266        for i in &roots {
267            if let Some(&node) = lookup.get(i) {
268                todo.push_back(node);
269            }
270        }
271        while let Some(old_idx) = todo.pop_front() {
272            let new_idx = nodes_index_map.len();
273            nodes_index_map.insert(old_idx, new_idx);
274
275            let mut outgoing = outgoing_edges::<T>(old_idx, &nodes, &edges);
276            while let Some(node) = outgoing.next_node_inner(&edges) {
277                todo.push_back(node);
278            }
279        }
280
281        let mut idx = 0;
282        nodes.retain_mut(|_| {
283            let retain = nodes_index_map.contains_key(&idx);
284            idx += 1;
285            retain
286        });
287
288        for node_idx in 0..nodes.len() {
289            let mut outgoing = outgoing_edges::<T>(node_idx, &nodes, &edges);
290            while let Some((edge, old_idx)) = outgoing.next_edge(&edges) {
291                let new_idx = edges_index_map.len();
292                edges_index_map.insert(old_idx.get(), new_idx);
293
294                edges[old_idx.get()].node = *nodes_index_map.get(&edge.node).unwrap();
295            }
296        }
297
298        let mut idx = 0;
299        edges.retain_mut(|_| {
300            let retain = edges_index_map.contains_key(&idx);
301            idx += 1;
302            retain
303        });
304
305        for node in &mut nodes {
306            if let Some(ref mut i) = node.last_edge_index {
307                *i = NonZeroUsize::new(*edges_index_map.get(&i.get()).unwrap()).unwrap();
308            }
309            if let Some(ref mut i) = node.first_edge_index {
310                *i = NonZeroUsize::new(*edges_index_map.get(&i.get()).unwrap()).unwrap();
311            }
312        }
313
314        for edge in &mut edges {
315            if let Some(ref mut i) = edge.next_edge {
316                *i = NonZeroUsize::new(*edges_index_map.get(&i.get()).unwrap()).unwrap();
317            }
318        }
319
320        let old_nodes = nodes;
321        let mut nodes = Vec::new();
322
323        for n in old_nodes {
324            nodes.push(Node {
325                query_instance: QueryInstance {
326                    input: n.query_instance.input.deep_clone(new_storage),
327                    output: n.query_instance.output.map(|i| i.deep_clone(new_storage)),
328
329                    run: n.query_instance.run,
330                    name: n.query_instance.name,
331                    mode: n.query_instance.mode,
332                    generation: n.query_instance.generation,
333                    output_hash: n.query_instance.output_hash,
334                    color: n.query_instance.color,
335                    transitively_has_always_dep: n.query_instance.transitively_has_always_dep,
336                },
337                first_edge_index: n.first_edge_index,
338                last_edge_index: n.last_edge_index,
339            });
340        }
341
342        tracing::info!("new edges size: {}", edges.len());
343        tracing::info!("new nodes size: {}", nodes.len());
344        tracing::info!("new storage size: {}", new_storage.size());
345
346        Context {
347            inner: RefCell::new(Inner {
348                curr,
349                nodes,
350                edges,
351                roots,
352                lookup,
353                cycle_detection,
354                generation,
355                cache_enabled,
356            }),
357            storage: new_storage,
358            data: self.data,
359        }
360    }
361
362    /// Returns the (approximate) size in bytes of the cache right now.
363    ///
364    /// The approximation is always at most wrong by a linear factor,
365    /// and can be used to determine when to garbage collect. However,
366    /// some fields that don't actually grow are not counted.
367    pub fn size(&self) -> usize {
368        let inner = self.inner.borrow();
369        let nodes_size = inner.nodes.len() * mem::size_of::<Node<T>>();
370        let edges_size = inner.edges.len() * mem::size_of::<Edge>();
371        let arena_size = self.storage.size();
372        let lookup_size = inner.lookup.len() * (mem::size_of::<usize>() + mem::size_of::<u128>());
373
374        nodes_size + edges_size + arena_size + lookup_size
375    }
376    // pub fn deserialize() {}
377
378    // pub fn serialize(&mut self, path: impl) {
379    //
380    // }
381
382    /// Used in macros
383    #[doc(hidden)]
384    pub fn hash<Q: Query<'cx, T>>(&self, query: Q, input: &impl QueryParameter) -> u128 {
385        let mut hasher = QueryHasher::new();
386        // also hash the query, so that paramters to different
387        // queries don't have hash collisions.
388        // THIS IS IMPORTANT FOR SAFETY!!
389        query.type_id().hash(&mut hasher);
390        input.hash_stable(&mut hasher);
391        hasher.finish128().as_u128()
392    }
393
394    /// Determines whether the node's output is actually up-to-date
395    /// and counts as a cache hit
396    fn cache_usable(&self, node: usize) -> bool {
397        {
398            let inner = self.inner.borrow();
399            let instance = &inner.get_node(node).query_instance;
400
401            tracing::debug!("check if {instance} is a hit");
402            match instance.mode {
403                QueryMode::Always => {
404                    tracing::debug!("miss (always)");
405                    // this dependency should always be rerun, so the cache is never usable
406                    return false;
407                }
408                QueryMode::Generation if inner.generation.is_newer_than(instance.generation) => {
409                    tracing::debug!("miss (new generation)");
410                    // this dependency should always be rerun, so the cache is never usable
411                    return false;
412                }
413                _ => {
414                    // otherwise, unsure, let's find out
415                }
416            }
417
418            // if the color is green, it's a guaranteed hit
419            // at least, if the generation changed just make sure that there
420            // aren't any dependent generational queries that used to return Green
421            // but over the generation really should have started to return Red.
422            // TODO: check if there's a always dep
423            if instance.color == QueryColor::Green
424                && !inner.generation.is_newer_than(instance.generation)
425                && !instance.transitively_has_always_dep
426            {
427                tracing::debug!("automatic hit");
428                return true;
429            }
430        }
431
432        // we need to rerun if any of the dependencies changed
433        // since the last time we ran this query.
434        // reasons dependencies can change:
435        // * the generation updated and a dependency is generation dependent
436        // * a dependency always has to rerun, and it produced a different result than before
437        // * any of this happening recursively
438        //
439        // Thus, for any query, we need to know if deep down it contains any always-changing
440        // dependencies, and generation-changing dependencies.
441        //
442        // Luckily, each dependent is itself a query, which knows for itself how to compute
443        // whether it has to be rerun through this same function recursively. The cases above
444        // are the base cases. Here, we can iterate over all the dependent queries and simply try
445        // to run them all.
446        let mut outgoing = self.inner.borrow().outgoing_edges(node);
447
448        while let Some(i) = outgoing.next_node(self) {
449            // when we run this query, the very first thing it does is call back into
450            // this function but one layer down. We can then hit either a base case or
451            // go through this recursive case again.
452            self.run_query_instance_of_node(i);
453
454            // after running (possibly a recompute), the result is either
455            // red or green. On red, we know that one of the dependencies
456            // changed and we need to recompute
457            match self.inner.borrow().get_node(i).query_instance.color {
458                QueryColor::Green => continue, // however, on green we can continue
459                QueryColor::Red => {
460                    tracing::debug!("cache miss");
461                    return false;
462                }
463                QueryColor::Unknown => {
464                    unreachable!()
465                }
466            }
467        }
468
469        tracing::debug!("cache hit all deps green");
470
471        // if all inputs were green, we can set this node to green too.
472        // it won't change (see Cache Promotion)
473        self.inner
474            .borrow_mut()
475            .get_node_mut(node)
476            .query_instance
477            .color = QueryColor::Green;
478
479        true
480    }
481
482    fn try_get_usable_cache(&self, node: usize) -> Option<TypeErasedQueryParam<'cx>> {
483        if self.cache_usable(node) {
484            // this possibly changed output we retrieve here.
485            self.inner.borrow().get_node(node).query_instance.output
486        } else {
487            None
488        }
489    }
490
491    fn is_cached(&self, input_hash: u128) -> Option<usize> {
492        self.inner.borrow().lookup.get(&input_hash).copied()
493    }
494
495    fn run_query_instance_of_node(&self, node: usize) -> TypeErasedQueryParam<'cx> {
496        {
497            let mut inner = self.inner.borrow_mut();
498            if let Some(old) = inner.curr {
499                // add an edge between what used to be the current node, and this one
500                // since we clearly depend on it.
501                // TODO: dedupe?
502                inner.add_edge_between(old, node);
503            }
504        }
505
506        if self.inner.borrow().cache_enabled {
507            // we got the id of a node. The node already contains an output. The question is,
508            // can we use it? If we can, return that output
509            if let Some(output) = self.try_get_usable_cache(node) {
510                // Unerase the pointer. We know it's type is Q::Output here
511                //
512                // Note: This relies on there not being a hash collision between a hash
513                // of an instance of Q::Output and some other T::Output
514                return output;
515            }
516        }
517
518        // otherwise, compute it
519        let (run, input, old_curr_node, _span) = {
520            let mut inner = self.inner.borrow_mut();
521
522            // temporarily make this the "current" node we're working one
523            // this is used to register what this query depends on while
524            // running it
525            let old_curr_node = inner.curr;
526            inner.curr = Some(node);
527            let node = inner.get_node_mut(node);
528            tracing::debug!("yeet dep list of {}", node.query_instance);
529            // reset the dependents list,
530            // since we're going to be rerunning the query
531            // and the dependents might change
532            node.first_edge_index = None;
533            node.last_edge_index = None;
534
535            let span = tracing::span!(Level::DEBUG, "", "{}", node.query_instance).entered();
536            tracing::debug!("run {}", node.query_instance);
537            (
538                node.query_instance.run,
539                node.query_instance.input,
540                old_curr_node,
541                span,
542            )
543        };
544
545        // run the actual query (through a pointer, since we don't know
546        // anything about the concrete query type here)
547        let (new_output, output_hash) = (run)(self, input, &|output_hash| {
548            let inner = self.inner.borrow();
549            let instance = &inner.get_node(node).query_instance;
550
551            // no need to reallocate if the hash remains the same, the old allocation works
552            instance.output.is_none() || output_hash != instance.output_hash
553        });
554
555        let mut inner = self.inner.borrow_mut();
556        inner.curr = old_curr_node;
557        // if the thing we just executed got marked as
558        // transitively having an always dep, then the parent also does.
559        if let Some(old_curr_node) = old_curr_node {
560            let has_transitive_dep = inner
561                .get_node(node)
562                .query_instance
563                .transitively_has_always_dep;
564
565            if inner.get_node(old_curr_node).query_instance.mode == QueryMode::Always {
566                inner
567                    .get_node_mut(old_curr_node)
568                    .query_instance
569                    .transitively_has_always_dep = true;
570            } else {
571                inner
572                    .get_node_mut(old_curr_node)
573                    .query_instance
574                    .transitively_has_always_dep = has_transitive_dep;
575            }
576        }
577
578        let generation = inner.generation;
579        let instance = &mut inner.get_node_mut(node).query_instance;
580
581        tracing::trace!("hash: {} => {}", instance.output_hash, output_hash);
582
583        // update the node based on the result
584        instance.color = if instance.output.is_none() {
585            assert!(
586                new_output.is_some(),
587                "should be some because of closure passed to run"
588            );
589            instance.output = new_output;
590            instance.output_hash = output_hash;
591            QueryColor::Red
592        } else if output_hash == instance.output_hash {
593            // if the hash of the output didn't change,
594            // then this query is all safe to use wherever you want.
595            QueryColor::Green
596        } else {
597            assert!(
598                new_output.is_some(),
599                "should be some because of closure passed to run"
600            );
601            instance.output = new_output;
602            instance.output_hash = output_hash;
603
604            QueryColor::Red
605        };
606        instance.generation = generation;
607
608        // TODO: can be unchecked, we know for sure this is Some
609        instance.output.unwrap()
610    }
611
612    #[doc(hidden)]
613    pub fn query<Q: Query<'cx, T>>(&self, query: Q, input: Q::Input) -> &'cx Q::Output {
614        let input_hash = self.hash(query, &input);
615        if self
616            .inner
617            .borrow_mut()
618            .cycle_detection
619            .push_is_cycle(input_hash)
620        {
621            let mut data = Vec::new();
622            let inner = self.inner.borrow();
623
624            if let Some(i) = inner.lookup.get(&input_hash) {
625                let instance = &inner.get_node(*i).query_instance;
626                data.push(format!("running query `{}` after running:", instance.name));
627            } else {
628                data.push(format!("running query `{}` after running:", Q::NAME));
629            }
630
631            let mut idx = 0;
632            for i in inner.cycle_detection.history() {
633                idx += 1;
634                if let Some(i) = inner.lookup.get(&i) {
635                    let instance = &inner.get_node(*i).query_instance;
636                    data.push(format!("{idx}. query `{}`", instance.name));
637                } else {
638                    data.push(format!("{idx}. query with hash `{i}`"));
639                }
640            }
641            panic!("cycle detected {}", data.join("\n"));
642        }
643
644        // if we call this query but no query was currently active
645        // then this is a kind of query root. Anything it references
646        // should stay alive, and if something is not referenced by
647        // a root anymore we can safely garbage collect it.
648        if self.inner.borrow().curr.is_none() {
649            self.inner.borrow_mut().roots.push(input_hash);
650        }
651
652        let node = if let Some(node) = self.is_cached(input_hash) {
653            node
654        } else {
655            // make a new node if we've not seen this query-input combination before.
656            // Nodes have a unique input hash. Input hashes also hash the query identifier.
657            // A new node represents a new (input, query) pair.
658
659            let input_ref = &*self.storage.alloc(input);
660            let query = QueryInstance {
661                run: Q::get_run_fn(),
662                name: Q::NAME,
663                mode: query.mode(),
664                generation: Generation::NULL,
665                color: QueryColor::Unknown,
666                input: TypeErasedQueryParam::new(input_ref),
667                output: None,
668                output_hash: 0,
669                transitively_has_always_dep: query.mode() == QueryMode::Always,
670            };
671
672            let mut this = self.inner.borrow_mut();
673            let node = this.nodes.len();
674            this.nodes.push(Node {
675                query_instance: query,
676                first_edge_index: None,
677                last_edge_index: None,
678            });
679            this.lookup.insert(input_hash, node);
680
681            node
682        };
683
684        // Safety: we run a type erased query instance, but here we know that the
685        // query type of the instance is Q
686        let res = unsafe { self.run_query_instance_of_node(node).get_ref() };
687        assert_eq!(self.inner.borrow_mut().cycle_detection.pop(), input_hash);
688        res
689    }
690}
691
692#[cfg(test)]
693mod tests {
694    use crate::context::NO_HASHSET_LEN;
695
696    use super::CycleDetection;
697
698    fn cycle(mut c: CycleDetection) {
699        assert!(!c.push_is_cycle(1));
700        assert!(!c.push_is_cycle(2));
701        assert!(!c.push_is_cycle(3));
702        assert!(c.push_is_cycle(1));
703        assert!(c.push_is_cycle(2));
704        assert!(c.push_is_cycle(3));
705        c.pop();
706        assert!(!c.push_is_cycle(3));
707    }
708
709    #[test]
710    fn test_cycle() {
711        let c = CycleDetection::new();
712        cycle(c);
713    }
714
715    #[test]
716    fn test_long() {
717        let mut c = CycleDetection::new();
718        for i in 0..(NO_HASHSET_LEN + 10) {
719            assert!(!c.push_is_cycle(i as u128));
720        }
721        assert!(c.push_is_cycle(1));
722        assert!(c.push_is_cycle(NO_HASHSET_LEN as u128 + 9));
723        c.pop();
724        assert!(!c.push_is_cycle(NO_HASHSET_LEN as u128 + 9));
725        for _ in 0..(NO_HASHSET_LEN + 9) {
726            c.pop();
727        }
728
729        cycle(c);
730    }
731
732    #[test]
733    #[should_panic]
734    fn test_empty() {
735        let mut c = CycleDetection::new();
736        c.pop();
737    }
738}