scopegraphs/resolve/
mod.rs

1//! This module contains code to query scope graphs.
2
3use scopegraphs_prust_lib::hashmap::HashSet as TrieSet;
4use std::collections::HashSet;
5use std::fmt::{Debug, Formatter};
6use std::hash::Hash;
7use std::marker::PhantomData;
8use std::rc::Rc;
9
10mod params;
11use crate::{Label, Scope, ScopeGraph};
12pub use params::*;
13use scopegraphs_regular_expressions::RegexMatcher;
14
15pub mod lookup;
16
17/// Representation of either a labeled edge or the special 'data' label.
18///
19/// Used to implement label orders. The `Data` label is there to support expressing preference
20/// between traversing an edge or resolving to the current node.
21// TODO: document this well, also with a `concepts` page about $ and labels
22//       @Aron could you do this?
23#[allow(missing_docs)]
24#[derive(Hash, PartialEq, Eq)]
25pub enum EdgeOrData<LABEL> {
26    Data,
27    Edge(LABEL),
28}
29
30impl<LABEL: Debug> Debug for EdgeOrData<LABEL> {
31    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
32        match self {
33            EdgeOrData::Data => write!(f, "$"),
34            EdgeOrData::Edge(lbl) => write!(f, "@{:?}", lbl),
35        }
36    }
37}
38
39// custom implementation not to impose LABEL: Copy
40impl<LABEL: Copy> Clone for EdgeOrData<LABEL> {
41    fn clone(&self) -> Self {
42        *self
43    }
44}
45
46impl<LABEL: Copy> Copy for EdgeOrData<LABEL> {}
47
48#[derive(Hash, PartialEq, Eq, Debug)]
49enum InnerPath<LABEL> {
50    Start {
51        source: Scope,
52    },
53    Step {
54        prefix: Path<LABEL>,
55        label: LABEL,
56        target: Scope,
57    },
58}
59
60/// Path (alternating sequence of scopes and labels) in a scope graph.
61#[derive(Clone)]
62pub struct Path<LABEL> {
63    inner_path: Rc<InnerPath<LABEL>>,
64    /// Set of all scopes in this path.
65    ///
66    /// Paths are alternating sequences of scopes and labels.
67    /// In scope graphs, paths may not be cyclic.
68    /// To check whether a possible _path extension_ is cyclic, we maintain a separate set of all scopes in a path.
69    /// The [`Path::step`] function will check whether the new scope is in this set, and only return an extended oath if this is not the case.
70    /// This is cheaper than traversing the [`Path::inner_path`], at the cost of some more memory usage.
71    ///
72    /// In order to make paths cheap to extend multiple times, we use a persistent data structure.
73    scopes: Rc<TrieSet<Scope>>,
74    // FIXME: put fields in same Arc
75}
76
77impl<LABEL> PartialEq for Path<LABEL>
78where
79    Scope: PartialEq,
80    LABEL: PartialEq,
81{
82    fn eq(&self, other: &Self) -> bool {
83        // `self.scopes` is determined by the `inner_path`, so no need to check for equality there.
84        self.inner_path == other.inner_path
85    }
86}
87
88impl<LABEL> Eq for Path<LABEL>
89where
90    Scope: Eq,
91    LABEL: Eq,
92{
93}
94
95impl<LABEL> Hash for Path<LABEL>
96where
97    Scope: Hash,
98    LABEL: Hash,
99{
100    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
101        // `self.scopes` is determined by the `inner_path`, so no need to separately hash it.
102        self.inner_path.hash(state);
103    }
104}
105
106impl<LABEL> Debug for Path<LABEL>
107where
108    LABEL: Debug,
109{
110    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
111        f.debug_struct("Path")
112            // `self.scopes` is determined by the `inner_path`, so no need to include it in the debug representation.
113            .field("inner_path", &self.inner_path)
114            .finish()
115    }
116}
117
118/// A path in the scope graph, including the data of the target of the path.
119#[derive(Hash, PartialEq, Eq, Debug)]
120pub struct ResolvedPath<'sg, LABEL, DATA> {
121    path: Path<LABEL>,
122    pub(crate) data: &'sg DATA,
123}
124
125impl<LABEL: Clone, DATA> Clone for ResolvedPath<'_, LABEL, DATA> {
126    fn clone(&self) -> Self {
127        Self {
128            path: self.path.clone(),
129            data: self.data,
130        }
131    }
132}
133
134impl<LABEL, DATA> ResolvedPath<'_, LABEL, DATA> {
135    /// Get the full path of scopes leading to this target scope
136    pub fn path(&self) -> &Path<LABEL> {
137        &self.path
138    }
139
140    /// Get the data on this target scope.
141    pub fn data(&self) -> &DATA {
142        self.data
143    }
144}
145
146impl<LABEL> Path<LABEL> {
147    /// Creates a new path that contains of a single scope.
148    pub fn new(source: Scope) -> Self {
149        Self {
150            inner_path: Rc::new(InnerPath::Start { source }),
151            scopes: Rc::new(TrieSet::new().insert(source)),
152        }
153    }
154
155    /// Returns the last scope in the path.
156    pub fn target(&self) -> Scope {
157        match self.inner_path.as_ref() {
158            InnerPath::Start { source } => *source,
159            InnerPath::Step { target, .. } => *target,
160        }
161    }
162
163    /// Extends the path with a new edge.
164    ///
165    /// Returns `None` if the resulting path would contain a cycle.
166    pub fn step(&self, label: LABEL, target: Scope) -> Option<Self> {
167        if self.scopes.search(&target) {
168            None
169        } else {
170            Some(Self {
171                inner_path: Rc::new(InnerPath::Step {
172                    prefix: Self {
173                        inner_path: self.inner_path.clone(),
174                        scopes: self.scopes.clone(),
175                    },
176                    label,
177                    target,
178                }),
179                scopes: Rc::new(self.scopes.insert(target)),
180            })
181        }
182    }
183
184    /// Creates a resolved path from this path.
185    pub fn resolve<DATA>(self, data: &DATA) -> ResolvedPath<'_, LABEL, DATA> {
186        ResolvedPath { path: self, data }
187    }
188}
189
190/// Representation of an environment (i.e., a collection of resolved paths).
191// For now, we stick with hashmaps because they are easy.
192// We might however want to change that in the future, because:
193// - we currently create a lot of new hashmaps, which is not really efficient
194// - efficiency might be dependent on the name resolution (shadowing) strategy
195// - we (not always necessarily) clone hashmaps often
196// Perhaps we will resort to fibbonacy heaps/pairing heaps, and/or make resolution parametric in the environment type.
197#[derive(Debug)]
198pub struct Env<'sg, LABEL: 'sg, DATA>(HashSet<ResolvedPath<'sg, LABEL, DATA>>);
199
200impl<'a, 'sg, LABEL, DATA> IntoIterator for &'a Env<'sg, LABEL, DATA> {
201    type Item = &'a ResolvedPath<'sg, LABEL, DATA>;
202
203    type IntoIter = <&'a HashSet<ResolvedPath<'sg, LABEL, DATA>> as IntoIterator>::IntoIter;
204
205    fn into_iter(self) -> Self::IntoIter {
206        self.0.iter()
207    }
208}
209
210impl<'sg, LABEL, DATA> IntoIterator for Env<'sg, LABEL, DATA> {
211    type Item = ResolvedPath<'sg, LABEL, DATA>;
212
213    type IntoIter = <HashSet<ResolvedPath<'sg, LABEL, DATA>> as IntoIterator>::IntoIter;
214
215    fn into_iter(self) -> Self::IntoIter {
216        self.0.into_iter()
217    }
218}
219
220impl<LABEL, DATA> Default for Env<'_, LABEL, DATA> {
221    fn default() -> Self {
222        Self::new()
223    }
224}
225
226impl<'sg, LABEL, DATA> Env<'sg, LABEL, DATA> {
227    /// Creates an empty environemnt.
228    pub fn new() -> Self {
229        Self(HashSet::new())
230    }
231
232    /// Create an iterator over all paths in the environment.
233    pub fn iter<'a>(&'a self) -> impl Iterator<Item = &'a ResolvedPath<'sg, LABEL, DATA>> + 'a {
234        self.0.iter()
235    }
236
237    /// Returns `true` is the environment is empty, `false` otherwise.
238    pub fn is_empty(&self) -> bool {
239        self.0.is_empty()
240    }
241}
242
243/// Error emitted by [Env::get_only_item] when the environment argument did not contain exactly one argument.
244pub enum OnlyElementError<'a, 'sg, DATA, LABEL, I>
245where
246    I: Iterator<Item = &'a ResolvedPath<'sg, DATA, LABEL>>,
247{
248    /// Environment was empty
249    Empty,
250    /// Environment contained multiple items
251    Multiple {
252        /// the first element that the iterator returned
253        first: &'a ResolvedPath<'sg, DATA, LABEL>,
254        /// the second element the iterator returned (witnessing the environment is not a singleton environment)
255        second: &'a ResolvedPath<'sg, DATA, LABEL>,
256        /// the iterator (can be used to access the remaining elements)
257        rest: I,
258    },
259}
260
261impl<'a, 'sg, DATA, LABEL, I> Debug for OnlyElementError<'a, 'sg, DATA, LABEL, I>
262where
263    I: Iterator<Item = &'a ResolvedPath<'sg, DATA, LABEL>>,
264{
265    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
266        match self {
267            OnlyElementError::Empty => write!(f, "OnlyElementError::Empty"),
268            OnlyElementError::Multiple { .. } => {
269                write!(f, "OnlyElementError::Multiple {{..}}")
270            }
271        }
272    }
273}
274
275impl<'a, 'sg, DATA, LABEL, I> IntoIterator for OnlyElementError<'a, 'sg, DATA, LABEL, I>
276where
277    I: Iterator<Item = &'a ResolvedPath<'sg, DATA, LABEL>>,
278{
279    type Item = &'a ResolvedPath<'sg, DATA, LABEL>;
280    type IntoIter = OnlyElementErrorIter<'a, 'sg, DATA, LABEL, I>;
281
282    fn into_iter(self) -> Self::IntoIter {
283        OnlyElementErrorIter { e: self, offset: 0 }
284    }
285}
286
287/// Iterator over an [`OnlyElementError`], to easily access its elements.
288pub struct OnlyElementErrorIter<'a, 'sg, DATA, LABEL, I>
289where
290    I: Iterator<Item = &'a ResolvedPath<'sg, DATA, LABEL>>,
291{
292    e: OnlyElementError<'a, 'sg, DATA, LABEL, I>,
293    offset: usize,
294}
295
296impl<'a, 'sg, DATA, LABEL, I> Iterator for OnlyElementErrorIter<'a, 'sg, DATA, LABEL, I>
297where
298    I: Iterator<Item = &'a ResolvedPath<'sg, DATA, LABEL>>,
299{
300    type Item = &'a ResolvedPath<'sg, DATA, LABEL>;
301
302    fn next(&mut self) -> Option<Self::Item> {
303        match &mut self.e {
304            OnlyElementError::Empty => None,
305            OnlyElementError::Multiple {
306                first,
307                second,
308                rest,
309            } => match self.offset {
310                0 => {
311                    self.offset += 1;
312                    Some(first)
313                }
314                1 => {
315                    self.offset += 1;
316                    Some(second)
317                }
318                _ => rest.next(),
319            },
320        }
321    }
322}
323
324impl<'sg, LABEL, DATA> Env<'sg, LABEL, DATA>
325where
326    ResolvedPath<'sg, LABEL, DATA>: Eq + Hash + Clone,
327{
328    /// Create an environment with a single path.
329    pub fn single(path: ResolvedPath<'sg, LABEL, DATA>) -> Self {
330        let mut env = Env::new();
331        env.insert(path);
332        env
333    }
334
335    /// Insert a path in the environment.
336    pub fn insert(&mut self, path: ResolvedPath<'sg, LABEL, DATA>) {
337        self.0.insert(path);
338    }
339
340    /// Add all paths in `other` to the current environment.
341    pub fn merge(&mut self, other: &Self) {
342        self.0.extend(other.0.iter().cloned())
343    }
344
345    /// Returns `Ok(value)` if the environment only has a single resolved path, or an error otherwise.
346    #[allow(clippy::type_complexity)]
347    pub fn get_only_item<'a>(
348        &'a self,
349    ) -> Result<
350        ResolvedPath<'sg, LABEL, DATA>,
351        OnlyElementError<
352            'a,
353            'sg,
354            LABEL,
355            DATA,
356            impl Iterator<Item = &'a ResolvedPath<'sg, LABEL, DATA>> + 'a,
357        >,
358    > {
359        let mut iter = self.iter();
360        iter.next().map_or(Err(OnlyElementError::Empty), |value| {
361            iter.next().map_or_else(
362                || Ok(value.clone()),
363                |second| {
364                    Err(OnlyElementError::Multiple {
365                        first: value,
366                        second,
367                        rest: iter,
368                    })
369                },
370            )
371        })
372    }
373}
374
375impl<'sg, LABEL: 'sg, DATA> FromIterator<ResolvedPath<'sg, LABEL, DATA>> for Env<'sg, LABEL, DATA>
376where
377    ResolvedPath<'sg, LABEL, DATA>: Eq + Hash,
378{
379    fn from_iter<T: IntoIterator<Item = ResolvedPath<'sg, LABEL, DATA>>>(iter: T) -> Self {
380        Env(HashSet::from_iter(iter))
381    }
382}
383
384impl<'sg, LABEL: 'sg, DATA: Hash> FromIterator<Env<'sg, LABEL, DATA>> for Env<'sg, LABEL, DATA>
385where
386    ResolvedPath<'sg, LABEL, DATA>: Eq + Hash,
387{
388    fn from_iter<T: IntoIterator<Item = Env<'sg, LABEL, DATA>>>(iter: T) -> Self {
389        iter.into_iter().flatten().collect()
390    }
391}
392
393impl<'sg, LABEL: 'sg + Clone, DATA> Clone for Env<'sg, LABEL, DATA> {
394    fn clone(&self) -> Self {
395        Env(self.0.clone())
396    }
397}
398
399/// A trait that represents a specific name resolution algorithm.
400///
401/// Currently has one implementor: [`Query`].
402pub trait Resolve<'sg, 'rslv> {
403    /// The type of result that this query gives.
404    /// This depends on the [completeness strategy](crate::completeness::Completeness) used.
405    ///
406    /// * Using [`ImplicitClose`](crate::completeness::ImplicitClose) this is a simply a `Vec<Scope>`.
407    ///   Querying using this completeness strategy cannot fail.
408    /// * Using [`ExplicitClose`](crate::completeness::ExplicitClose) this is a [`EdgesOrDelay<Scope, LABEL>`](crate::completeness::EdgesOrDelay).
409    ///
410    /// Querying can fail, because a scope this query traverses wasn't closed yet.
411    ///
412    /// Using [`FutureCompleteness`](crate::completeness::FutureCompleteness), this is a [`Future`](std::future::Future).
413    ///
414    /// Querying can pend, because a scope this query traverses wasn't closed yet.
415    type EnvContainer
416    where
417        'sg: 'rslv,
418        Self: 'rslv;
419
420    /// actually run this query
421    fn resolve(&'rslv self, scope: Scope) -> Self::EnvContainer;
422}
423
424/// A query over a scope graph. Read more [here](crate::concepts)
425pub struct Query<'storage, 'sg, 'rslv, LABEL: Label, DATA, CMPL, PWF, DWF, LO, DEq> {
426    _phantom: PhantomData<&'rslv ()>,
427    scope_graph: &'sg ScopeGraph<'storage, LABEL, DATA, CMPL>,
428    path_wellformedness: PWF,
429    data_wellformedness: DWF,
430    label_order: LO,
431    data_equivalence: DEq,
432}
433
434impl<'sg, 'storage, 'rslv, LABEL: Label, DATA, CMPL, PWF, DWF, LO, DEq>
435    Query<'sg, 'storage, 'rslv, LABEL, DATA, CMPL, PWF, DWF, LO, DEq>
436{
437    /// Add a [path well-formedness](crate::concepts::path_wellformedness) to this query.
438    ///
439    /// A path well-formedness can be specified using a regular expression.
440    /// Often you want to use [`query_regex!`](crate::query_regex) here.
441    ///
442    /// ```rust
443    /// # use scopegraphs::{query_regex, ScopeGraph, Storage};
444    /// # use scopegraphs::completeness::UncheckedCompleteness;
445    /// # use scopegraphs::Label;
446    /// # let storage = Storage::new();
447    /// # let scopegraph = ScopeGraph::<Lbl, (), _>::new(&storage, unsafe{UncheckedCompleteness::new()});
448    ///
449    /// #[derive(Label, Eq, PartialEq, Copy, Clone)]
450    /// pub enum Lbl {
451    ///     Lexical,
452    ///     Definition
453    /// }
454    /// use Lbl::*;
455    ///
456    /// scopegraph.query()
457    ///     .with_path_wellformedness(query_regex!(Lbl: Lexical* Definition));
458    ///     
459    /// ```
460    ///
461    pub fn with_path_wellformedness<NPWF>(
462        self,
463        new_path_wellformedness: NPWF,
464    ) -> Query<'sg, 'storage, 'rslv, LABEL, DATA, CMPL, NPWF, DWF, LO, DEq>
465    where
466        NPWF: for<'a> RegexMatcher<&'a LABEL> + 'rslv,
467    {
468        Query {
469            _phantom: PhantomData,
470            scope_graph: self.scope_graph,
471            path_wellformedness: new_path_wellformedness,
472            data_wellformedness: self.data_wellformedness,
473            label_order: self.label_order,
474            data_equivalence: self.data_equivalence,
475        }
476    }
477
478    /// Add a [data well-formedness](crate::concepts::data_wellformedness) to this query.
479    /// A data well-formedness must implement [`DataWellformedness`].
480    ///
481    /// Defaults to [`DefaultDataWellformedness`], considering all data to be well-formed.
482    ///
483    /// With a data well-formedness you can specify what data a scope must have to be a valid
484    /// target for this query.
485    ///
486    /// ```rust
487    /// # use scopegraphs::{query_regex, ScopeGraph, Storage};
488    /// # use scopegraphs::completeness::UncheckedCompleteness;
489    /// # use scopegraphs::Label;
490    /// # let storage = Storage::new();
491    /// # let scopegraph = ScopeGraph::<(), MyData, _>::new(&storage, unsafe{UncheckedCompleteness::new()});
492    /// use scopegraphs::resolve::DataWellformedness;
493    ///
494    /// struct MyData {
495    ///     is_good: bool
496    /// }
497    /// struct MyDataWellformedness;
498    /// impl<'sg> DataWellformedness<'sg, MyData> for MyDataWellformedness {
499    ///     type Output = bool;
500    ///     fn data_wf(&self, data: &'sg MyData) -> bool {
501    ///         data.is_good
502    ///     }
503    /// }
504    ///
505    /// scopegraph.query()
506    ///     .with_data_wellformedness(MyDataWellformedness);
507    ///
508    /// ```
509    ///
510    /// A data-wellformedness can be a lambda that takes a reference to `DATA` and returns a boolean.
511    ///
512    /// ```rust
513    /// # use scopegraphs::{query_regex, ScopeGraph, Storage};
514    /// # use scopegraphs::completeness::UncheckedCompleteness;
515    /// # use scopegraphs::Label;
516    /// # let storage = Storage::new();
517    /// # let scopegraph = ScopeGraph::<(), MyData, _>::new(&storage, unsafe{UncheckedCompleteness::new()});
518    /// use scopegraphs::resolve::DataWellformedness;
519    ///
520    /// struct MyData {
521    ///     is_good: bool
522    /// }
523    /// scopegraph.query()
524    ///     .with_data_wellformedness(|data: &MyData| data.is_good);
525    ///
526    /// ```
527    pub fn with_data_wellformedness<NDWF>(
528        self,
529        new_data_wellformedness: NDWF,
530    ) -> Query<'sg, 'storage, 'rslv, LABEL, DATA, CMPL, PWF, NDWF, LO, DEq>
531    where
532        NDWF: DataWellformedness<'sg, DATA> + 'rslv,
533    {
534        Query {
535            _phantom: PhantomData,
536            scope_graph: self.scope_graph,
537            path_wellformedness: self.path_wellformedness,
538            data_wellformedness: new_data_wellformedness,
539            label_order: self.label_order,
540            data_equivalence: self.data_equivalence,
541        }
542    }
543
544    /// Add a [label order](crate::concepts::label_ordering) to this query.
545    /// A label order must implement [`LabelOrder`].
546    ///
547    /// Defaults to [`DefaultLabelOrder`], considering all labels of equal importance.
548    ///
549    /// With a label order, you can specify which labels are "more important" in a query.
550    /// Specify label orders using the [`label_order!`](crate::label_order) macro.
551    /// TODO: lower is better? from Lace.
552    ///
553    /// ```rust
554    /// # use scopegraphs::{query_regex, ScopeGraph, Storage};
555    /// # use scopegraphs::completeness::UncheckedCompleteness;
556    /// # use scopegraphs::Label;
557    /// # let storage = Storage::new();
558    /// # let scopegraph = ScopeGraph::<Lbl, (), _>::new(&storage, unsafe{UncheckedCompleteness::new()});
559    /// use scopegraphs_macros::label_order;
560    ///
561    /// #[derive(Label, Copy, Clone, PartialEq, Eq)]
562    /// pub enum Lbl {
563    ///     Lexical,
564    ///     Definition
565    /// }
566    /// use Lbl::*;
567    ///
568    /// scopegraph.query()
569    ///     .with_label_order(label_order!(Lbl: Lexical < Definition));
570    /// ```
571    pub fn with_label_order<NLO>(
572        self,
573        new_label_order: NLO,
574    ) -> Query<'sg, 'storage, 'rslv, LABEL, DATA, CMPL, PWF, DWF, NLO, DEq>
575    where
576        NLO: LabelOrder<LABEL> + 'rslv,
577    {
578        Query {
579            _phantom: PhantomData,
580            scope_graph: self.scope_graph,
581            path_wellformedness: self.path_wellformedness,
582            data_wellformedness: self.data_wellformedness,
583            label_order: new_label_order,
584            data_equivalence: self.data_equivalence,
585        }
586    }
587
588    /// Add a [data equivalence](crate::concepts::data_equivalence) to this query.
589    /// A data equivalence must implement [`DataEquivalence`].
590    ///
591    /// TODO: example (@aron?)
592    pub fn with_data_equivalence<NDEq>(
593        self,
594        new_data_equivalence: NDEq,
595    ) -> Query<'sg, 'storage, 'rslv, LABEL, DATA, CMPL, PWF, DWF, LO, NDEq>
596    where
597        NDEq: DataEquivalence<'sg, DATA> + 'rslv,
598    {
599        Query {
600            _phantom: PhantomData,
601            scope_graph: self.scope_graph,
602            path_wellformedness: self.path_wellformedness,
603            data_wellformedness: self.data_wellformedness,
604            label_order: self.label_order,
605            data_equivalence: new_data_equivalence,
606        }
607    }
608}
609
610impl<'storage, LABEL: Label, DATA, CMPL> ScopeGraph<'storage, LABEL, DATA, CMPL> {
611    /// Build a query over the scope graph.
612    pub fn query<'rslv>(
613        &'rslv self,
614    ) -> Query<
615        'storage,
616        'rslv,
617        'rslv, // TODO: remove(???)
618        LABEL,
619        DATA,
620        CMPL,
621        (),
622        DefaultDataWellformedness,
623        DefaultLabelOrder,
624        DefaultDataEquivalence,
625    >
626    where
627        'storage: 'rslv,
628    {
629        Query {
630            _phantom: PhantomData,
631            scope_graph: self,
632            path_wellformedness: (),
633            data_wellformedness: DefaultDataWellformedness::default(),
634            label_order: DefaultLabelOrder::default(),
635            data_equivalence: DefaultDataEquivalence::default(),
636        }
637    }
638}