substrait-validator 0.1.4

Substrait validator
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
// SPDX-License-Identifier: Apache-2.0

//! Module for representing namespaces used within extension definitions.

use crate::output::diagnostic;
use crate::output::extension;
use crate::parse::context;
use crate::parse::traversal;
use std::collections::HashMap;
use std::sync::Arc;

/// Reference to a nested namespace structure.
pub type Reference<T> = Arc<Definition<T>>;

/// Nested namespace structure.
///
/// There are a lot of things "weird" about this structure, because Substrait
/// is defined horribly; everything is a valid identifier (including
/// identifiers with periods in them, so a period is ambiguous between a
/// namespace separator and just part of an identifier), matching is
/// case-insensitive (but for the validator we do want to store what the user's
/// case convention was for a definition), and for type variations names
/// need not even be unique (arguably there's a different variation namespace
/// for each type class, but the type class isn't yet known when a type
/// variation anchor is defined, and that's where we want to resolve the name;
/// see <https://github.com/substrait-io/substrait/issues/268>).
///
/// So here we have an abomination of a namespace system that allows for name
/// conflicts and period ambiguity and just tries to deal with it as cleanly as
/// it can.
#[derive(Debug)]
pub struct Definition<T> {
    /// Members of the namespace. The key is stored in lowercase.
    members: HashMap<String, Vec<InternalReference<T>>>,
}

impl<T> Clone for Definition<T> {
    fn clone(&self) -> Self {
        Self {
            members: self.members.clone(),
        }
    }
}

impl<T> Default for Definition<T> {
    fn default() -> Self {
        Self {
            members: Default::default(),
        }
    }
}

impl<T> Definition<T> {
    /// Creates a new, empty namespace.
    pub fn new() -> Self {
        Default::default()
    }

    /// Defines an item with the given (case-insensitive) name. If an item
    /// going by a conflicting name already existed, the new item is inserted
    /// anyway; if the caller is concerned about that, it must first resolve
    /// the name and throw a warning or error based on its result.
    pub fn define_item<S: AsRef<str>>(&mut self, name: S, item: Arc<T>, public: bool) {
        let name = name.as_ref();
        self.members
            .entry(name.to_ascii_lowercase())
            .or_default()
            .push(InternalReference {
                member: Arc::new(Member::Item(item)),
                original_name: name.to_string(),
                public,
            })
    }

    /// Registers a nested namespace with the given (case-insensitive) name.
    /// Same rules for duplicates as define_item(). If None is specified for
    /// item, the namespace reference is registered, but its contents are
    /// left unspecified.
    pub fn define_nested<S: AsRef<str>>(
        &mut self,
        name: S,
        item: Option<Reference<T>>,
        public: bool,
    ) {
        let name = name.as_ref();
        self.members
            .entry(name.to_ascii_lowercase())
            .or_default()
            .push(InternalReference {
                member: Arc::new(if let Some(item) = item {
                    Member::Nested(item)
                } else {
                    Member::UnresolvedNested
                }),
                original_name: name.to_string(),
                public,
            })
    }

    /// Helper function that determines whether a reference is visible, based
    /// on:
    ///  - whether this is a local lookup (private members visible) or an
    ///    external lookup (private members not visible);
    ///  - whether we had to traverse into namespaces in order to get to the
    ///    reference we're looking at;
    ///  - whether all of the above namespaces were public;
    ///  - whether the reference we're looking at is public.
    fn visibility(
        local: bool,
        with_prefix: bool,
        prefix_public: bool,
        reference_public: bool,
    ) -> bool {
        // Clippy doesn't like this, but the logic is hard enough to understand
        // without being buried in a big boolean expression.
        #[allow(clippy::if_same_then_else)]
        #[allow(clippy::needless_bool)]
        if local && !with_prefix {
            // Everything defined locally is visible.
            true
        } else if (prefix_public || !with_prefix) && reference_public {
            // If all references in the path are public, the item is visible.
            true
        } else {
            // Otherwise, the item is not visible.
            false
        }
    }

    /// Helper function for resolve. Resolves all references defined in the
    /// current namespace and its children recursively.
    ///
    ///  - target: the result object that matching elements get pushed into.
    ///  - local: whether this is a local lookup (i.e. private members of
    ///    the root namespace are visible).
    ///  - prefix: if specified, the concatenated original names of the
    ///    parent namespaces that were already recursed into.
    ///  - suffix: the name that we need to resolve locally.
    ///  - prefix_public: whether the prefix is public or private. Should be
    ///    true if there is no prefix.
    fn resolve_internal(
        &self,
        target: &mut ResolutionResult<T>,
        local: bool,
        prefix: Option<&str>,
        suffix: &str,
        prefix_public: bool,
    ) {
        // Resolve locally-defined elements.
        if let Some(references) = self.members.get(&suffix.to_ascii_lowercase()) {
            for reference in references {
                let original_name = if let Some(prefix) = prefix {
                    format!("{prefix}.{}", reference.original_name)
                } else {
                    reference.original_name.clone()
                };
                let visible =
                    Self::visibility(local, prefix.is_some(), prefix_public, reference.public);
                let target_vec = if visible {
                    &mut target.visible
                } else {
                    &mut target.invisible
                };
                target_vec.push((original_name, reference.member.clone()))
            }
        }

        // If the suffix has periods in it, try splitting at every period it
        // has and seeing if that prefix resolves to any nested namespace.
        for (split_point, _) in suffix.char_indices().filter(|(_, c)| *c == '.') {
            let namespace_name = &suffix[..split_point];
            let new_suffix = &suffix[split_point + 1..];
            if let Some(references) = self.members.get(&namespace_name.to_ascii_lowercase()) {
                for reference in references {
                    match reference.member.as_ref() {
                        Member::Item(_) => (),
                        Member::Nested(nested) => {
                            let new_prefix = if let Some(prefix) = prefix {
                                format!("{prefix}.{}", reference.original_name)
                            } else {
                                reference.original_name.clone()
                            };
                            nested.resolve_internal(
                                target,
                                local,
                                Some(&new_prefix),
                                new_suffix,
                                prefix_public && reference.public,
                            );
                        }
                        Member::UnresolvedNested => {
                            let visible = Self::visibility(
                                local,
                                prefix.is_some(),
                                prefix_public,
                                reference.public,
                            );
                            if visible {
                                target.visible_incomplete = true;
                            } else {
                                target.invisible_incomplete = true;
                            };
                        }
                    }
                }
            }
        }
    }

    /// Resolves a name to all items with the same name visible from within this
    /// namespace (so, including private items).
    pub fn resolve_local<R>(&self, reference: R) -> ResolutionResult<T>
    where
        R: Into<extension::reference::Data<T>>,
    {
        let reference = reference.into();
        let name = reference.name.name().unwrap_or("!").to_string();
        let mut result = ResolutionResult::new(reference);
        self.resolve_internal(&mut result, true, None, &name, true);
        result
    }

    /// Resolves a name to all items with the same name visible from outside
    /// this namespace (so, excluding private items).
    pub fn resolve_public<R>(&self, reference: R) -> ResolutionResult<T>
    where
        R: Into<extension::reference::Data<T>>,
    {
        let reference = reference.into();
        let name = reference.name.name().unwrap_or("!").to_string();
        let mut result = ResolutionResult::new(reference);
        self.resolve_internal(&mut result, false, None, &name, true);
        result
    }
}

/// Reference to a namespace member.
#[derive(Debug)]
struct InternalReference<T> {
    /// The member that is being referred to.
    pub member: Arc<Member<T>>,

    /// The name of the member using its original case convention.
    pub original_name: String,

    /// Whether this member has public visibility.
    pub public: bool,
}

impl<T> Clone for InternalReference<T> {
    fn clone(&self) -> Self {
        Self {
            member: self.member.clone(),
            original_name: self.original_name.clone(),
            public: self.public,
        }
    }
}

/// A namespace member.
#[derive(Debug)]
enum Member<T> {
    /// A leaf item.
    Item(Arc<T>),

    /// A nested namespace.
    Nested(Reference<T>),

    /// A nested namespace of which the contents were not resolved.
    UnresolvedNested,
}

impl<T> Clone for Member<T> {
    fn clone(&self) -> Self {
        match self {
            Self::Item(x) => Self::Item(x.clone()),
            Self::Nested(x) => Self::Nested(x.clone()),
            Self::UnresolvedNested => Self::UnresolvedNested,
        }
    }
}

impl<T> Member<T> {
    /// Returns the embedded item if this is an item.
    pub fn as_item(&self) -> Option<Arc<T>> {
        match self {
            Member::Item(x) => Some(x.clone()),
            _ => None,
        }
    }

    /// Returns the embedded potentially-unresolved namespace if this is a
    /// namespace.
    pub fn as_namespace(&self) -> Option<Option<Reference<T>>> {
        match self {
            Member::Nested(x) => Some(Some(x.clone())),
            Member::UnresolvedNested => Some(None),
            _ => None,
        }
    }
}

/// Result structure for a namespace resolution.
#[derive(Clone, Debug)]
pub struct ResolutionResult<T> {
    /// The reference that was being resolved.
    unresolved_reference: extension::reference::Data<T>,

    /// Members that are visible from the point where the resolution was
    /// performed.
    visible: Vec<(String, Arc<Member<T>>)>,

    /// Whether there might be more visible members, defined within
    /// unresolved namespaces.
    visible_incomplete: bool,

    /// Members that are not visible from the point where the resolution was
    /// performed. These are enumerated nonetheless to improve the quality of
    /// error messages.
    invisible: Vec<(String, Arc<Member<T>>)>,

    /// Whether there might be more invisible members, defined within
    /// unresolved namespaces.
    invisible_incomplete: bool,

    /// Whether a filter has been applied and visible items were removed by
    /// it.
    filtered: bool,
}

impl<T> std::fmt::Display for ResolutionResult<T> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.unresolved_reference)?;
        if self.visible_incomplete {
            match self.visible.len() {
                0 => write!(
                    f,
                    " (no known matching definitions, namespace not resolved)"
                ),
                1 => write!(f, " (one or more matching definitions)"),
                n => write!(f, " ({n} or more matching definitions)"),
            }
        } else {
            match self.visible.len() {
                0 => write!(f, " (no matching definitions)"),
                1 => write!(f, " (one matching definition)"),
                n => write!(f, " ({n} matching definitions)"),
            }
        }
    }
}

impl<T> ResolutionResult<T> {
    /// Creates a new, empty resolution result for the given unresolved
    /// reference, to be used as a placeholder when no namespace is actually
    /// available. It will behave as if resolution failed because the namespace
    /// being looked in was itself not resolved. If an item is passed via the
    /// reference, it will be returned by as_item(). as_namespace() will return
    /// None.
    pub fn new(unresolved_reference: extension::reference::Data<T>) -> Self {
        Self {
            unresolved_reference,
            visible: vec![],
            visible_incomplete: true,
            invisible: vec![],
            invisible_incomplete: false,
            filtered: false,
        }
    }

    /// Internal helper for the various filter functions.
    fn filter<F: Fn(&(String, Arc<Member<T>>)) -> bool>(mut self, filter: F) -> Self {
        let size = self.visible.len();
        self.visible.retain(&filter);
        self.invisible.retain(&filter);
        if self.visible.len() < size {
            self.filtered = true;
        }
        self
    }

    /// Filter the resulting items by some predicate. This also filters out
    /// namespaces.
    pub fn filter_items<F: Fn(&T) -> bool>(self, filter: F) -> Self {
        self.filter(|x| {
            x.1.as_item()
                .map(|x| filter(x.as_ref()))
                .unwrap_or_default()
        })
    }

    /// Filter the resulting items by some predicate. This also filters out
    /// namespaces.
    pub fn filter_all_items(self) -> Self {
        self.filter(|x| x.1.as_item().is_some())
    }

    /// Filter out anything that isn't a namespace.
    pub fn filter_namespaces(self) -> Self {
        self.filter(|x| x.1.as_namespace().is_some())
    }

    /// Helper function for expect_one() and expect_multiple().
    fn expect<F1, F2>(
        self,
        parse_context: &mut context::Context,
        if_not_applicable: F1,
        if_ambiguous: F2,
        allow_ambiguity: bool,
        optional: bool,
    ) -> Self
    where
        F1: FnOnce(String, &mut context::Context) -> bool,
        F2: FnOnce(String, &mut context::Context) -> bool,
    {
        let mut visible = self.visible.iter();
        if visible.next().is_some() {
            if visible.next().is_some()
                && !allow_ambiguity
                && !if_ambiguous(self.unresolved_reference.to_string(), parse_context)
            {
                traversal::push_diagnostic(
                    parse_context,
                    diagnostic::Level::Error,
                    cause!(
                        LinkAmbiguousName,
                        "multiple definitions are in scope for {}",
                        self.unresolved_reference
                    ),
                )
            }
        } else if self.visible_incomplete || optional {
            // A visible namespace was not resolved (in which case we
            // optimistically assume the item exists) or the item doesn't need
            // to exist.
        } else if !self.invisible.is_empty() {
            traversal::push_diagnostic(
                parse_context,
                diagnostic::Level::Error,
                cause!(
                    LinkUnresolvedName,
                    "a definition for {} exists, but is not visible from here",
                    self.unresolved_reference
                ),
            );
        } else if self.invisible_incomplete {
            traversal::push_diagnostic(
                parse_context,
                diagnostic::Level::Error,
                cause!(
                    LinkUnresolvedName,
                    "a definition for {} may exist, but would not be visible from here",
                    self.unresolved_reference
                ),
            );
        } else if self.filtered {
            if !if_not_applicable(self.unresolved_reference.to_string(), parse_context) {
                traversal::push_diagnostic(
                    parse_context,
                    diagnostic::Level::Error,
                    cause!(
                        LinkUnresolvedName,
                        "a definition for {} exists, but it cannot be used in this context",
                        self.unresolved_reference
                    ),
                )
            }
        } else {
            traversal::push_diagnostic(
                parse_context,
                diagnostic::Level::Error,
                cause!(
                    LinkUnresolvedName,
                    "no definition found for {}",
                    self.unresolved_reference
                ),
            );
        }
        self
    }

    /// Expects a single item, yielding diagnostics if this isn't the case. If
    /// ambiguous or if all matching elements were not applicable (filtered
    /// out), the specified functions are called. They receive the reference
    /// name and the parse context as arguments to form a suitable diagnostic
    /// message. If they return false, a default diagnostic message will be
    /// emitted instead.
    pub fn expect_one<F1, F2>(
        self,
        parse_context: &mut context::Context,
        if_not_applicable: F1,
        if_ambiguous: F2,
    ) -> Self
    where
        F1: FnOnce(String, &mut context::Context) -> bool,
        F2: FnOnce(String, &mut context::Context) -> bool,
    {
        self.expect(parse_context, if_not_applicable, if_ambiguous, false, false)
    }

    /// Expects zero or one item(s), yielding diagnostics if this isn't the
    /// case. If ambiguous, the specified function is called. It receives the
    /// reference name and the parse context as arguments to form a suitable
    /// diagnostic message. If it returns false, a default diagnostic message
    /// will be emitted instead.
    pub fn expect_not_ambiguous<F>(
        self,
        parse_context: &mut context::Context,
        if_ambiguous: F,
    ) -> Self
    where
        F: FnOnce(String, &mut context::Context) -> bool,
    {
        self.expect(parse_context, |_, _| true, if_ambiguous, false, true)
    }

    /// Expects a one or more items, yielding diagnostics if this isn't the
    /// case. If all matching elements were not applicable (filtered out), the
    /// specified functions are called. They receive the reference name and the
    /// parse context as arguments to form a suitable diagnostic message. If
    /// they return false, a default diagnostic message will be emitted
    /// instead.
    pub fn expect_multiple<F>(
        self,
        parse_context: &mut context::Context,
        if_not_applicable: F,
    ) -> Self
    where
        F: FnOnce(String, &mut context::Context) -> bool,
    {
        self.expect(parse_context, if_not_applicable, |_, _| true, true, false)
    }

    /// Silently returns the first matching item, if any. If there are none,
    /// this just returns an unresolved reference. Use
    /// filter_items().expect_one() to formulate error messages if there are
    /// multiple or no items available.
    pub fn as_item(&self) -> extension::reference::Reference<T> {
        let mut data = self.unresolved_reference.clone();
        if let Some(item) = self.visible.iter().filter_map(|x| x.1.as_item()).next() {
            data.definition.replace(item);
        }
        Arc::new(data)
    }

    /// Silently returns the first matching item, if any. Unlike as_item(),
    /// this returns None if there are no matches.
    pub fn as_opt_item(&self) -> Option<extension::reference::Reference<T>> {
        self.visible
            .iter()
            .filter_map(|x| x.1.as_item())
            .next()
            .map(|item| {
                let mut data = self.unresolved_reference.clone();
                data.definition.replace(item);
                Arc::new(data)
            })
    }

    /// Silently returns the first matching namespace. Use
    /// filter_namespaces().expect_one() to formulate error messages if there
    /// are multiple or no namespaces available.
    pub fn as_namespace(&self) -> Option<Reference<T>> {
        self.visible
            .iter()
            .filter_map(|x| x.1.as_namespace())
            .next()
            .flatten()
    }

    /// Return an error if one or more definitions were found for this name
    /// resolution, to be used just before defining a new item.
    pub fn expect_not_yet_defined(&self) -> diagnostic::Result<()> {
        if self.visible.is_empty() {
            Ok(())
        } else {
            Err(cause!(
                LinkDuplicateDefinition,
                "{} is already defined",
                self.unresolved_reference
            ))
        }
    }
}