substrait_validator/output/extension/
namespace.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Module for representing namespaces used within extension definitions.
4
5use crate::output::diagnostic;
6use crate::output::extension;
7use crate::parse::context;
8use crate::parse::traversal;
9use std::collections::HashMap;
10use std::sync::Arc;
11
12/// Reference to a nested namespace structure.
13pub type Reference<T> = Arc<Definition<T>>;
14
15/// Nested namespace structure.
16///
17/// There are a lot of things "weird" about this structure, because Substrait
18/// is defined horribly; everything is a valid identifier (including
19/// identifiers with periods in them, so a period is ambiguous between a
20/// namespace separator and just part of an identifier), matching is
21/// case-insensitive (but for the validator we do want to store what the user's
22/// case convention was for a definition), and for type variations names
23/// need not even be unique (arguably there's a different variation namespace
24/// for each type class, but the type class isn't yet known when a type
25/// variation anchor is defined, and that's where we want to resolve the name;
26/// see <https://github.com/substrait-io/substrait/issues/268>).
27///
28/// So here we have an abomination of a namespace system that allows for name
29/// conflicts and period ambiguity and just tries to deal with it as cleanly as
30/// it can.
31#[derive(Debug)]
32pub struct Definition<T> {
33    /// Members of the namespace. The key is stored in lowercase.
34    members: HashMap<String, Vec<InternalReference<T>>>,
35}
36
37impl<T> Clone for Definition<T> {
38    fn clone(&self) -> Self {
39        Self {
40            members: self.members.clone(),
41        }
42    }
43}
44
45impl<T> Default for Definition<T> {
46    fn default() -> Self {
47        Self {
48            members: Default::default(),
49        }
50    }
51}
52
53impl<T> Definition<T> {
54    /// Creates a new, empty namespace.
55    pub fn new() -> Self {
56        Default::default()
57    }
58
59    /// Defines an item with the given (case-insensitive) name. If an item
60    /// going by a conflicting name already existed, the new item is inserted
61    /// anyway; if the caller is concerned about that, it must first resolve
62    /// the name and throw a warning or error based on its result.
63    pub fn define_item<S: AsRef<str>>(&mut self, name: S, item: Arc<T>, public: bool) {
64        let name = name.as_ref();
65        self.members
66            .entry(name.to_ascii_lowercase())
67            .or_default()
68            .push(InternalReference {
69                member: Arc::new(Member::Item(item)),
70                original_name: name.to_string(),
71                public,
72            })
73    }
74
75    /// Registers a nested namespace with the given (case-insensitive) name.
76    /// Same rules for duplicates as define_item(). If None is specified for
77    /// item, the namespace reference is registered, but its contents are
78    /// left unspecified.
79    pub fn define_nested<S: AsRef<str>>(
80        &mut self,
81        name: S,
82        item: Option<Reference<T>>,
83        public: bool,
84    ) {
85        let name = name.as_ref();
86        self.members
87            .entry(name.to_ascii_lowercase())
88            .or_default()
89            .push(InternalReference {
90                member: Arc::new(if let Some(item) = item {
91                    Member::Nested(item)
92                } else {
93                    Member::UnresolvedNested
94                }),
95                original_name: name.to_string(),
96                public,
97            })
98    }
99
100    /// Helper function that determines whether a reference is visible, based
101    /// on:
102    ///  - whether this is a local lookup (private members visible) or an
103    ///    external lookup (private members not visible);
104    ///  - whether we had to traverse into namespaces in order to get to the
105    ///    reference we're looking at;
106    ///  - whether all of the above namespaces were public;
107    ///  - whether the reference we're looking at is public.
108    fn visibility(
109        local: bool,
110        with_prefix: bool,
111        prefix_public: bool,
112        reference_public: bool,
113    ) -> bool {
114        // Clippy doesn't like this, but the logic is hard enough to understand
115        // without being buried in a big boolean expression.
116        #[allow(clippy::if_same_then_else)]
117        #[allow(clippy::needless_bool)]
118        if local && !with_prefix {
119            // Everything defined locally is visible.
120            true
121        } else if (prefix_public || !with_prefix) && reference_public {
122            // If all references in the path are public, the item is visible.
123            true
124        } else {
125            // Otherwise, the item is not visible.
126            false
127        }
128    }
129
130    /// Helper function for resolve. Resolves all references defined in the
131    /// current namespace and its children recursively.
132    ///
133    ///  - target: the result object that matching elements get pushed into.
134    ///  - local: whether this is a local lookup (i.e. private members of
135    ///    the root namespace are visible).
136    ///  - prefix: if specified, the concatenated original names of the
137    ///    parent namespaces that were already recursed into.
138    ///  - suffix: the name that we need to resolve locally.
139    ///  - prefix_public: whether the prefix is public or private. Should be
140    ///    true if there is no prefix.
141    fn resolve_internal(
142        &self,
143        target: &mut ResolutionResult<T>,
144        local: bool,
145        prefix: Option<&str>,
146        suffix: &str,
147        prefix_public: bool,
148    ) {
149        // Resolve locally-defined elements.
150        if let Some(references) = self.members.get(&suffix.to_ascii_lowercase()) {
151            for reference in references {
152                let original_name = if let Some(prefix) = prefix {
153                    format!("{prefix}.{}", reference.original_name)
154                } else {
155                    reference.original_name.clone()
156                };
157                let visible =
158                    Self::visibility(local, prefix.is_some(), prefix_public, reference.public);
159                let target_vec = if visible {
160                    &mut target.visible
161                } else {
162                    &mut target.invisible
163                };
164                target_vec.push((original_name, reference.member.clone()))
165            }
166        }
167
168        // If the suffix has periods in it, try splitting at every period it
169        // has and seeing if that prefix resolves to any nested namespace.
170        for (split_point, _) in suffix.char_indices().filter(|(_, c)| *c == '.') {
171            let namespace_name = &suffix[..split_point];
172            let new_suffix = &suffix[split_point + 1..];
173            if let Some(references) = self.members.get(&namespace_name.to_ascii_lowercase()) {
174                for reference in references {
175                    match reference.member.as_ref() {
176                        Member::Item(_) => (),
177                        Member::Nested(nested) => {
178                            let new_prefix = if let Some(prefix) = prefix {
179                                format!("{prefix}.{}", reference.original_name)
180                            } else {
181                                reference.original_name.clone()
182                            };
183                            nested.resolve_internal(
184                                target,
185                                local,
186                                Some(&new_prefix),
187                                new_suffix,
188                                prefix_public && reference.public,
189                            );
190                        }
191                        Member::UnresolvedNested => {
192                            let visible = Self::visibility(
193                                local,
194                                prefix.is_some(),
195                                prefix_public,
196                                reference.public,
197                            );
198                            if visible {
199                                target.visible_incomplete = true;
200                            } else {
201                                target.invisible_incomplete = true;
202                            };
203                        }
204                    }
205                }
206            }
207        }
208    }
209
210    /// Resolves a name to all items with the same name visible from within this
211    /// namespace (so, including private items).
212    pub fn resolve_local<R>(&self, reference: R) -> ResolutionResult<T>
213    where
214        R: Into<extension::reference::Data<T>>,
215    {
216        let reference = reference.into();
217        let name = reference.name.name().unwrap_or("!").to_string();
218        let mut result = ResolutionResult::new(reference);
219        self.resolve_internal(&mut result, true, None, &name, true);
220        result
221    }
222
223    /// Resolves a name to all items with the same name visible from outside
224    /// this namespace (so, excluding private items).
225    pub fn resolve_public<R>(&self, reference: R) -> ResolutionResult<T>
226    where
227        R: Into<extension::reference::Data<T>>,
228    {
229        let reference = reference.into();
230        let name = reference.name.name().unwrap_or("!").to_string();
231        let mut result = ResolutionResult::new(reference);
232        self.resolve_internal(&mut result, false, None, &name, true);
233        result
234    }
235}
236
237/// Reference to a namespace member.
238#[derive(Debug)]
239struct InternalReference<T> {
240    /// The member that is being referred to.
241    pub member: Arc<Member<T>>,
242
243    /// The name of the member using its original case convention.
244    pub original_name: String,
245
246    /// Whether this member has public visibility.
247    pub public: bool,
248}
249
250impl<T> Clone for InternalReference<T> {
251    fn clone(&self) -> Self {
252        Self {
253            member: self.member.clone(),
254            original_name: self.original_name.clone(),
255            public: self.public,
256        }
257    }
258}
259
260/// A namespace member.
261#[derive(Debug)]
262enum Member<T> {
263    /// A leaf item.
264    Item(Arc<T>),
265
266    /// A nested namespace.
267    Nested(Reference<T>),
268
269    /// A nested namespace of which the contents were not resolved.
270    UnresolvedNested,
271}
272
273impl<T> Clone for Member<T> {
274    fn clone(&self) -> Self {
275        match self {
276            Self::Item(x) => Self::Item(x.clone()),
277            Self::Nested(x) => Self::Nested(x.clone()),
278            Self::UnresolvedNested => Self::UnresolvedNested,
279        }
280    }
281}
282
283impl<T> Member<T> {
284    /// Returns the embedded item if this is an item.
285    pub fn as_item(&self) -> Option<Arc<T>> {
286        match self {
287            Member::Item(x) => Some(x.clone()),
288            _ => None,
289        }
290    }
291
292    /// Returns the embedded potentially-unresolved namespace if this is a
293    /// namespace.
294    pub fn as_namespace(&self) -> Option<Option<Reference<T>>> {
295        match self {
296            Member::Nested(x) => Some(Some(x.clone())),
297            Member::UnresolvedNested => Some(None),
298            _ => None,
299        }
300    }
301}
302
303/// Result structure for a namespace resolution.
304#[derive(Clone, Debug)]
305pub struct ResolutionResult<T> {
306    /// The reference that was being resolved.
307    unresolved_reference: extension::reference::Data<T>,
308
309    /// Members that are visible from the point where the resolution was
310    /// performed.
311    visible: Vec<(String, Arc<Member<T>>)>,
312
313    /// Whether there might be more visible members, defined within
314    /// unresolved namespaces.
315    visible_incomplete: bool,
316
317    /// Members that are not visible from the point where the resolution was
318    /// performed. These are enumerated nonetheless to improve the quality of
319    /// error messages.
320    invisible: Vec<(String, Arc<Member<T>>)>,
321
322    /// Whether there might be more invisible members, defined within
323    /// unresolved namespaces.
324    invisible_incomplete: bool,
325
326    /// Whether a filter has been applied and visible items were removed by
327    /// it.
328    filtered: bool,
329}
330
331impl<T> std::fmt::Display for ResolutionResult<T> {
332    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
333        write!(f, "{}", self.unresolved_reference)?;
334        if self.visible_incomplete {
335            match self.visible.len() {
336                0 => write!(
337                    f,
338                    " (no known matching definitions, namespace not resolved)"
339                ),
340                1 => write!(f, " (one or more matching definitions)"),
341                n => write!(f, " ({n} or more matching definitions)"),
342            }
343        } else {
344            match self.visible.len() {
345                0 => write!(f, " (no matching definitions)"),
346                1 => write!(f, " (one matching definition)"),
347                n => write!(f, " ({n} matching definitions)"),
348            }
349        }
350    }
351}
352
353impl<T> ResolutionResult<T> {
354    /// Creates a new, empty resolution result for the given unresolved
355    /// reference, to be used as a placeholder when no namespace is actually
356    /// available. It will behave as if resolution failed because the namespace
357    /// being looked in was itself not resolved. If an item is passed via the
358    /// reference, it will be returned by as_item(). as_namespace() will return
359    /// None.
360    pub fn new(unresolved_reference: extension::reference::Data<T>) -> Self {
361        Self {
362            unresolved_reference,
363            visible: vec![],
364            visible_incomplete: true,
365            invisible: vec![],
366            invisible_incomplete: false,
367            filtered: false,
368        }
369    }
370
371    /// Internal helper for the various filter functions.
372    fn filter<F: Fn(&(String, Arc<Member<T>>)) -> bool>(mut self, filter: F) -> Self {
373        let size = self.visible.len();
374        self.visible.retain(&filter);
375        self.invisible.retain(&filter);
376        if self.visible.len() < size {
377            self.filtered = true;
378        }
379        self
380    }
381
382    /// Filter the resulting items by some predicate. This also filters out
383    /// namespaces.
384    pub fn filter_items<F: Fn(&T) -> bool>(self, filter: F) -> Self {
385        self.filter(|x| {
386            x.1.as_item()
387                .map(|x| filter(x.as_ref()))
388                .unwrap_or_default()
389        })
390    }
391
392    /// Filter the resulting items by some predicate. This also filters out
393    /// namespaces.
394    pub fn filter_all_items(self) -> Self {
395        self.filter(|x| x.1.as_item().is_some())
396    }
397
398    /// Filter out anything that isn't a namespace.
399    pub fn filter_namespaces(self) -> Self {
400        self.filter(|x| x.1.as_namespace().is_some())
401    }
402
403    /// Helper function for expect_one() and expect_multiple().
404    fn expect<F1, F2>(
405        self,
406        parse_context: &mut context::Context,
407        if_not_applicable: F1,
408        if_ambiguous: F2,
409        allow_ambiguity: bool,
410        optional: bool,
411    ) -> Self
412    where
413        F1: FnOnce(String, &mut context::Context) -> bool,
414        F2: FnOnce(String, &mut context::Context) -> bool,
415    {
416        let mut visible = self.visible.iter();
417        if visible.next().is_some() {
418            if visible.next().is_some()
419                && !allow_ambiguity
420                && !if_ambiguous(self.unresolved_reference.to_string(), parse_context)
421            {
422                traversal::push_diagnostic(
423                    parse_context,
424                    diagnostic::Level::Error,
425                    cause!(
426                        LinkAmbiguousName,
427                        "multiple definitions are in scope for {}",
428                        self.unresolved_reference
429                    ),
430                )
431            }
432        } else if self.visible_incomplete || optional {
433            // A visible namespace was not resolved (in which case we
434            // optimistically assume the item exists) or the item doesn't need
435            // to exist.
436        } else if !self.invisible.is_empty() {
437            traversal::push_diagnostic(
438                parse_context,
439                diagnostic::Level::Error,
440                cause!(
441                    LinkUnresolvedName,
442                    "a definition for {} exists, but is not visible from here",
443                    self.unresolved_reference
444                ),
445            );
446        } else if self.invisible_incomplete {
447            traversal::push_diagnostic(
448                parse_context,
449                diagnostic::Level::Error,
450                cause!(
451                    LinkUnresolvedName,
452                    "a definition for {} may exist, but would not be visible from here",
453                    self.unresolved_reference
454                ),
455            );
456        } else if self.filtered {
457            if !if_not_applicable(self.unresolved_reference.to_string(), parse_context) {
458                traversal::push_diagnostic(
459                    parse_context,
460                    diagnostic::Level::Error,
461                    cause!(
462                        LinkUnresolvedName,
463                        "a definition for {} exists, but it cannot be used in this context",
464                        self.unresolved_reference
465                    ),
466                )
467            }
468        } else {
469            traversal::push_diagnostic(
470                parse_context,
471                diagnostic::Level::Error,
472                cause!(
473                    LinkUnresolvedName,
474                    "no definition found for {}",
475                    self.unresolved_reference
476                ),
477            );
478        }
479        self
480    }
481
482    /// Expects a single item, yielding diagnostics if this isn't the case. If
483    /// ambiguous or if all matching elements were not applicable (filtered
484    /// out), the specified functions are called. They receive the reference
485    /// name and the parse context as arguments to form a suitable diagnostic
486    /// message. If they return false, a default diagnostic message will be
487    /// emitted instead.
488    pub fn expect_one<F1, F2>(
489        self,
490        parse_context: &mut context::Context,
491        if_not_applicable: F1,
492        if_ambiguous: F2,
493    ) -> Self
494    where
495        F1: FnOnce(String, &mut context::Context) -> bool,
496        F2: FnOnce(String, &mut context::Context) -> bool,
497    {
498        self.expect(parse_context, if_not_applicable, if_ambiguous, false, false)
499    }
500
501    /// Expects zero or one item(s), yielding diagnostics if this isn't the
502    /// case. If ambiguous, the specified function is called. It receives the
503    /// reference name and the parse context as arguments to form a suitable
504    /// diagnostic message. If it returns false, a default diagnostic message
505    /// will be emitted instead.
506    pub fn expect_not_ambiguous<F>(
507        self,
508        parse_context: &mut context::Context,
509        if_ambiguous: F,
510    ) -> Self
511    where
512        F: FnOnce(String, &mut context::Context) -> bool,
513    {
514        self.expect(parse_context, |_, _| true, if_ambiguous, false, true)
515    }
516
517    /// Expects a one or more items, yielding diagnostics if this isn't the
518    /// case. If all matching elements were not applicable (filtered out), the
519    /// specified functions are called. They receive the reference name and the
520    /// parse context as arguments to form a suitable diagnostic message. If
521    /// they return false, a default diagnostic message will be emitted
522    /// instead.
523    pub fn expect_multiple<F>(
524        self,
525        parse_context: &mut context::Context,
526        if_not_applicable: F,
527    ) -> Self
528    where
529        F: FnOnce(String, &mut context::Context) -> bool,
530    {
531        self.expect(parse_context, if_not_applicable, |_, _| true, true, false)
532    }
533
534    /// Silently returns the first matching item, if any. If there are none,
535    /// this just returns an unresolved reference. Use
536    /// filter_items().expect_one() to formulate error messages if there are
537    /// multiple or no items available.
538    pub fn as_item(&self) -> extension::reference::Reference<T> {
539        let mut data = self.unresolved_reference.clone();
540        if let Some(item) = self.visible.iter().filter_map(|x| x.1.as_item()).next() {
541            data.definition.replace(item);
542        }
543        Arc::new(data)
544    }
545
546    /// Silently returns the first matching item, if any. Unlike as_item(),
547    /// this returns None if there are no matches.
548    pub fn as_opt_item(&self) -> Option<extension::reference::Reference<T>> {
549        self.visible
550            .iter()
551            .filter_map(|x| x.1.as_item())
552            .next()
553            .map(|item| {
554                let mut data = self.unresolved_reference.clone();
555                data.definition.replace(item);
556                Arc::new(data)
557            })
558    }
559
560    /// Silently returns the first matching namespace. Use
561    /// filter_namespaces().expect_one() to formulate error messages if there
562    /// are multiple or no namespaces available.
563    pub fn as_namespace(&self) -> Option<Reference<T>> {
564        self.visible
565            .iter()
566            .filter_map(|x| x.1.as_namespace())
567            .next()
568            .flatten()
569    }
570
571    /// Return an error if one or more definitions were found for this name
572    /// resolution, to be used just before defining a new item.
573    pub fn expect_not_yet_defined(&self) -> diagnostic::Result<()> {
574        if self.visible.is_empty() {
575            Ok(())
576        } else {
577            Err(cause!(
578                LinkDuplicateDefinition,
579                "{} is already defined",
580                self.unresolved_reference
581            ))
582        }
583    }
584}