substrait_validator/output/extension/namespace.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! Module for representing namespaces used within extension definitions.
4
5use crate::output::diagnostic;
6use crate::output::extension;
7use crate::parse::context;
8use crate::parse::traversal;
9use std::collections::HashMap;
10use std::sync::Arc;
11
12/// Reference to a nested namespace structure.
13pub type Reference<T> = Arc<Definition<T>>;
14
15/// Nested namespace structure.
16///
17/// There are a lot of things "weird" about this structure, because Substrait
18/// is defined horribly; everything is a valid identifier (including
19/// identifiers with periods in them, so a period is ambiguous between a
20/// namespace separator and just part of an identifier), matching is
21/// case-insensitive (but for the validator we do want to store what the user's
22/// case convention was for a definition), and for type variations names
23/// need not even be unique (arguably there's a different variation namespace
24/// for each type class, but the type class isn't yet known when a type
25/// variation anchor is defined, and that's where we want to resolve the name;
26/// see <https://github.com/substrait-io/substrait/issues/268>).
27///
28/// So here we have an abomination of a namespace system that allows for name
29/// conflicts and period ambiguity and just tries to deal with it as cleanly as
30/// it can.
31#[derive(Debug)]
32pub struct Definition<T> {
33 /// Members of the namespace. The key is stored in lowercase.
34 members: HashMap<String, Vec<InternalReference<T>>>,
35}
36
37impl<T> Clone for Definition<T> {
38 fn clone(&self) -> Self {
39 Self {
40 members: self.members.clone(),
41 }
42 }
43}
44
45impl<T> Default for Definition<T> {
46 fn default() -> Self {
47 Self {
48 members: Default::default(),
49 }
50 }
51}
52
53impl<T> Definition<T> {
54 /// Creates a new, empty namespace.
55 pub fn new() -> Self {
56 Default::default()
57 }
58
59 /// Defines an item with the given (case-insensitive) name. If an item
60 /// going by a conflicting name already existed, the new item is inserted
61 /// anyway; if the caller is concerned about that, it must first resolve
62 /// the name and throw a warning or error based on its result.
63 pub fn define_item<S: AsRef<str>>(&mut self, name: S, item: Arc<T>, public: bool) {
64 let name = name.as_ref();
65 self.members
66 .entry(name.to_ascii_lowercase())
67 .or_default()
68 .push(InternalReference {
69 member: Arc::new(Member::Item(item)),
70 original_name: name.to_string(),
71 public,
72 })
73 }
74
75 /// Registers a nested namespace with the given (case-insensitive) name.
76 /// Same rules for duplicates as define_item(). If None is specified for
77 /// item, the namespace reference is registered, but its contents are
78 /// left unspecified.
79 pub fn define_nested<S: AsRef<str>>(
80 &mut self,
81 name: S,
82 item: Option<Reference<T>>,
83 public: bool,
84 ) {
85 let name = name.as_ref();
86 self.members
87 .entry(name.to_ascii_lowercase())
88 .or_default()
89 .push(InternalReference {
90 member: Arc::new(if let Some(item) = item {
91 Member::Nested(item)
92 } else {
93 Member::UnresolvedNested
94 }),
95 original_name: name.to_string(),
96 public,
97 })
98 }
99
100 /// Helper function that determines whether a reference is visible, based
101 /// on:
102 /// - whether this is a local lookup (private members visible) or an
103 /// external lookup (private members not visible);
104 /// - whether we had to traverse into namespaces in order to get to the
105 /// reference we're looking at;
106 /// - whether all of the above namespaces were public;
107 /// - whether the reference we're looking at is public.
108 fn visibility(
109 local: bool,
110 with_prefix: bool,
111 prefix_public: bool,
112 reference_public: bool,
113 ) -> bool {
114 // Clippy doesn't like this, but the logic is hard enough to understand
115 // without being buried in a big boolean expression.
116 #[allow(clippy::if_same_then_else)]
117 #[allow(clippy::needless_bool)]
118 if local && !with_prefix {
119 // Everything defined locally is visible.
120 true
121 } else if (prefix_public || !with_prefix) && reference_public {
122 // If all references in the path are public, the item is visible.
123 true
124 } else {
125 // Otherwise, the item is not visible.
126 false
127 }
128 }
129
130 /// Helper function for resolve. Resolves all references defined in the
131 /// current namespace and its children recursively.
132 ///
133 /// - target: the result object that matching elements get pushed into.
134 /// - local: whether this is a local lookup (i.e. private members of
135 /// the root namespace are visible).
136 /// - prefix: if specified, the concatenated original names of the
137 /// parent namespaces that were already recursed into.
138 /// - suffix: the name that we need to resolve locally.
139 /// - prefix_public: whether the prefix is public or private. Should be
140 /// true if there is no prefix.
141 fn resolve_internal(
142 &self,
143 target: &mut ResolutionResult<T>,
144 local: bool,
145 prefix: Option<&str>,
146 suffix: &str,
147 prefix_public: bool,
148 ) {
149 // Resolve locally-defined elements.
150 if let Some(references) = self.members.get(&suffix.to_ascii_lowercase()) {
151 for reference in references {
152 let original_name = if let Some(prefix) = prefix {
153 format!("{prefix}.{}", reference.original_name)
154 } else {
155 reference.original_name.clone()
156 };
157 let visible =
158 Self::visibility(local, prefix.is_some(), prefix_public, reference.public);
159 let target_vec = if visible {
160 &mut target.visible
161 } else {
162 &mut target.invisible
163 };
164 target_vec.push((original_name, reference.member.clone()))
165 }
166 }
167
168 // If the suffix has periods in it, try splitting at every period it
169 // has and seeing if that prefix resolves to any nested namespace.
170 for (split_point, _) in suffix.char_indices().filter(|(_, c)| *c == '.') {
171 let namespace_name = &suffix[..split_point];
172 let new_suffix = &suffix[split_point + 1..];
173 if let Some(references) = self.members.get(&namespace_name.to_ascii_lowercase()) {
174 for reference in references {
175 match reference.member.as_ref() {
176 Member::Item(_) => (),
177 Member::Nested(nested) => {
178 let new_prefix = if let Some(prefix) = prefix {
179 format!("{prefix}.{}", reference.original_name)
180 } else {
181 reference.original_name.clone()
182 };
183 nested.resolve_internal(
184 target,
185 local,
186 Some(&new_prefix),
187 new_suffix,
188 prefix_public && reference.public,
189 );
190 }
191 Member::UnresolvedNested => {
192 let visible = Self::visibility(
193 local,
194 prefix.is_some(),
195 prefix_public,
196 reference.public,
197 );
198 if visible {
199 target.visible_incomplete = true;
200 } else {
201 target.invisible_incomplete = true;
202 };
203 }
204 }
205 }
206 }
207 }
208 }
209
210 /// Resolves a name to all items with the same name visible from within this
211 /// namespace (so, including private items).
212 pub fn resolve_local<R>(&self, reference: R) -> ResolutionResult<T>
213 where
214 R: Into<extension::reference::Data<T>>,
215 {
216 let reference = reference.into();
217 let name = reference.name.name().unwrap_or("!").to_string();
218 let mut result = ResolutionResult::new(reference);
219 self.resolve_internal(&mut result, true, None, &name, true);
220 result
221 }
222
223 /// Resolves a name to all items with the same name visible from outside
224 /// this namespace (so, excluding private items).
225 pub fn resolve_public<R>(&self, reference: R) -> ResolutionResult<T>
226 where
227 R: Into<extension::reference::Data<T>>,
228 {
229 let reference = reference.into();
230 let name = reference.name.name().unwrap_or("!").to_string();
231 let mut result = ResolutionResult::new(reference);
232 self.resolve_internal(&mut result, false, None, &name, true);
233 result
234 }
235}
236
237/// Reference to a namespace member.
238#[derive(Debug)]
239struct InternalReference<T> {
240 /// The member that is being referred to.
241 pub member: Arc<Member<T>>,
242
243 /// The name of the member using its original case convention.
244 pub original_name: String,
245
246 /// Whether this member has public visibility.
247 pub public: bool,
248}
249
250impl<T> Clone for InternalReference<T> {
251 fn clone(&self) -> Self {
252 Self {
253 member: self.member.clone(),
254 original_name: self.original_name.clone(),
255 public: self.public,
256 }
257 }
258}
259
260/// A namespace member.
261#[derive(Debug)]
262enum Member<T> {
263 /// A leaf item.
264 Item(Arc<T>),
265
266 /// A nested namespace.
267 Nested(Reference<T>),
268
269 /// A nested namespace of which the contents were not resolved.
270 UnresolvedNested,
271}
272
273impl<T> Clone for Member<T> {
274 fn clone(&self) -> Self {
275 match self {
276 Self::Item(x) => Self::Item(x.clone()),
277 Self::Nested(x) => Self::Nested(x.clone()),
278 Self::UnresolvedNested => Self::UnresolvedNested,
279 }
280 }
281}
282
283impl<T> Member<T> {
284 /// Returns the embedded item if this is an item.
285 pub fn as_item(&self) -> Option<Arc<T>> {
286 match self {
287 Member::Item(x) => Some(x.clone()),
288 _ => None,
289 }
290 }
291
292 /// Returns the embedded potentially-unresolved namespace if this is a
293 /// namespace.
294 pub fn as_namespace(&self) -> Option<Option<Reference<T>>> {
295 match self {
296 Member::Nested(x) => Some(Some(x.clone())),
297 Member::UnresolvedNested => Some(None),
298 _ => None,
299 }
300 }
301}
302
303/// Result structure for a namespace resolution.
304#[derive(Clone, Debug)]
305pub struct ResolutionResult<T> {
306 /// The reference that was being resolved.
307 unresolved_reference: extension::reference::Data<T>,
308
309 /// Members that are visible from the point where the resolution was
310 /// performed.
311 visible: Vec<(String, Arc<Member<T>>)>,
312
313 /// Whether there might be more visible members, defined within
314 /// unresolved namespaces.
315 visible_incomplete: bool,
316
317 /// Members that are not visible from the point where the resolution was
318 /// performed. These are enumerated nonetheless to improve the quality of
319 /// error messages.
320 invisible: Vec<(String, Arc<Member<T>>)>,
321
322 /// Whether there might be more invisible members, defined within
323 /// unresolved namespaces.
324 invisible_incomplete: bool,
325
326 /// Whether a filter has been applied and visible items were removed by
327 /// it.
328 filtered: bool,
329}
330
331impl<T> std::fmt::Display for ResolutionResult<T> {
332 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
333 write!(f, "{}", self.unresolved_reference)?;
334 if self.visible_incomplete {
335 match self.visible.len() {
336 0 => write!(
337 f,
338 " (no known matching definitions, namespace not resolved)"
339 ),
340 1 => write!(f, " (one or more matching definitions)"),
341 n => write!(f, " ({n} or more matching definitions)"),
342 }
343 } else {
344 match self.visible.len() {
345 0 => write!(f, " (no matching definitions)"),
346 1 => write!(f, " (one matching definition)"),
347 n => write!(f, " ({n} matching definitions)"),
348 }
349 }
350 }
351}
352
353impl<T> ResolutionResult<T> {
354 /// Creates a new, empty resolution result for the given unresolved
355 /// reference, to be used as a placeholder when no namespace is actually
356 /// available. It will behave as if resolution failed because the namespace
357 /// being looked in was itself not resolved. If an item is passed via the
358 /// reference, it will be returned by as_item(). as_namespace() will return
359 /// None.
360 pub fn new(unresolved_reference: extension::reference::Data<T>) -> Self {
361 Self {
362 unresolved_reference,
363 visible: vec![],
364 visible_incomplete: true,
365 invisible: vec![],
366 invisible_incomplete: false,
367 filtered: false,
368 }
369 }
370
371 /// Internal helper for the various filter functions.
372 fn filter<F: Fn(&(String, Arc<Member<T>>)) -> bool>(mut self, filter: F) -> Self {
373 let size = self.visible.len();
374 self.visible.retain(&filter);
375 self.invisible.retain(&filter);
376 if self.visible.len() < size {
377 self.filtered = true;
378 }
379 self
380 }
381
382 /// Filter the resulting items by some predicate. This also filters out
383 /// namespaces.
384 pub fn filter_items<F: Fn(&T) -> bool>(self, filter: F) -> Self {
385 self.filter(|x| {
386 x.1.as_item()
387 .map(|x| filter(x.as_ref()))
388 .unwrap_or_default()
389 })
390 }
391
392 /// Filter the resulting items by some predicate. This also filters out
393 /// namespaces.
394 pub fn filter_all_items(self) -> Self {
395 self.filter(|x| x.1.as_item().is_some())
396 }
397
398 /// Filter out anything that isn't a namespace.
399 pub fn filter_namespaces(self) -> Self {
400 self.filter(|x| x.1.as_namespace().is_some())
401 }
402
403 /// Helper function for expect_one() and expect_multiple().
404 fn expect<F1, F2>(
405 self,
406 parse_context: &mut context::Context,
407 if_not_applicable: F1,
408 if_ambiguous: F2,
409 allow_ambiguity: bool,
410 optional: bool,
411 ) -> Self
412 where
413 F1: FnOnce(String, &mut context::Context) -> bool,
414 F2: FnOnce(String, &mut context::Context) -> bool,
415 {
416 let mut visible = self.visible.iter();
417 if visible.next().is_some() {
418 if visible.next().is_some()
419 && !allow_ambiguity
420 && !if_ambiguous(self.unresolved_reference.to_string(), parse_context)
421 {
422 traversal::push_diagnostic(
423 parse_context,
424 diagnostic::Level::Error,
425 cause!(
426 LinkAmbiguousName,
427 "multiple definitions are in scope for {}",
428 self.unresolved_reference
429 ),
430 )
431 }
432 } else if self.visible_incomplete || optional {
433 // A visible namespace was not resolved (in which case we
434 // optimistically assume the item exists) or the item doesn't need
435 // to exist.
436 } else if !self.invisible.is_empty() {
437 traversal::push_diagnostic(
438 parse_context,
439 diagnostic::Level::Error,
440 cause!(
441 LinkUnresolvedName,
442 "a definition for {} exists, but is not visible from here",
443 self.unresolved_reference
444 ),
445 );
446 } else if self.invisible_incomplete {
447 traversal::push_diagnostic(
448 parse_context,
449 diagnostic::Level::Error,
450 cause!(
451 LinkUnresolvedName,
452 "a definition for {} may exist, but would not be visible from here",
453 self.unresolved_reference
454 ),
455 );
456 } else if self.filtered {
457 if !if_not_applicable(self.unresolved_reference.to_string(), parse_context) {
458 traversal::push_diagnostic(
459 parse_context,
460 diagnostic::Level::Error,
461 cause!(
462 LinkUnresolvedName,
463 "a definition for {} exists, but it cannot be used in this context",
464 self.unresolved_reference
465 ),
466 )
467 }
468 } else {
469 traversal::push_diagnostic(
470 parse_context,
471 diagnostic::Level::Error,
472 cause!(
473 LinkUnresolvedName,
474 "no definition found for {}",
475 self.unresolved_reference
476 ),
477 );
478 }
479 self
480 }
481
482 /// Expects a single item, yielding diagnostics if this isn't the case. If
483 /// ambiguous or if all matching elements were not applicable (filtered
484 /// out), the specified functions are called. They receive the reference
485 /// name and the parse context as arguments to form a suitable diagnostic
486 /// message. If they return false, a default diagnostic message will be
487 /// emitted instead.
488 pub fn expect_one<F1, F2>(
489 self,
490 parse_context: &mut context::Context,
491 if_not_applicable: F1,
492 if_ambiguous: F2,
493 ) -> Self
494 where
495 F1: FnOnce(String, &mut context::Context) -> bool,
496 F2: FnOnce(String, &mut context::Context) -> bool,
497 {
498 self.expect(parse_context, if_not_applicable, if_ambiguous, false, false)
499 }
500
501 /// Expects zero or one item(s), yielding diagnostics if this isn't the
502 /// case. If ambiguous, the specified function is called. It receives the
503 /// reference name and the parse context as arguments to form a suitable
504 /// diagnostic message. If it returns false, a default diagnostic message
505 /// will be emitted instead.
506 pub fn expect_not_ambiguous<F>(
507 self,
508 parse_context: &mut context::Context,
509 if_ambiguous: F,
510 ) -> Self
511 where
512 F: FnOnce(String, &mut context::Context) -> bool,
513 {
514 self.expect(parse_context, |_, _| true, if_ambiguous, false, true)
515 }
516
517 /// Expects a one or more items, yielding diagnostics if this isn't the
518 /// case. If all matching elements were not applicable (filtered out), the
519 /// specified functions are called. They receive the reference name and the
520 /// parse context as arguments to form a suitable diagnostic message. If
521 /// they return false, a default diagnostic message will be emitted
522 /// instead.
523 pub fn expect_multiple<F>(
524 self,
525 parse_context: &mut context::Context,
526 if_not_applicable: F,
527 ) -> Self
528 where
529 F: FnOnce(String, &mut context::Context) -> bool,
530 {
531 self.expect(parse_context, if_not_applicable, |_, _| true, true, false)
532 }
533
534 /// Silently returns the first matching item, if any. If there are none,
535 /// this just returns an unresolved reference. Use
536 /// filter_items().expect_one() to formulate error messages if there are
537 /// multiple or no items available.
538 pub fn as_item(&self) -> extension::reference::Reference<T> {
539 let mut data = self.unresolved_reference.clone();
540 if let Some(item) = self.visible.iter().filter_map(|x| x.1.as_item()).next() {
541 data.definition.replace(item);
542 }
543 Arc::new(data)
544 }
545
546 /// Silently returns the first matching item, if any. Unlike as_item(),
547 /// this returns None if there are no matches.
548 pub fn as_opt_item(&self) -> Option<extension::reference::Reference<T>> {
549 self.visible
550 .iter()
551 .filter_map(|x| x.1.as_item())
552 .next()
553 .map(|item| {
554 let mut data = self.unresolved_reference.clone();
555 data.definition.replace(item);
556 Arc::new(data)
557 })
558 }
559
560 /// Silently returns the first matching namespace. Use
561 /// filter_namespaces().expect_one() to formulate error messages if there
562 /// are multiple or no namespaces available.
563 pub fn as_namespace(&self) -> Option<Reference<T>> {
564 self.visible
565 .iter()
566 .filter_map(|x| x.1.as_namespace())
567 .next()
568 .flatten()
569 }
570
571 /// Return an error if one or more definitions were found for this name
572 /// resolution, to be used just before defining a new item.
573 pub fn expect_not_yet_defined(&self) -> diagnostic::Result<()> {
574 if self.visible.is_empty() {
575 Ok(())
576 } else {
577 Err(cause!(
578 LinkDuplicateDefinition,
579 "{} is already defined",
580 self.unresolved_reference
581 ))
582 }
583 }
584}