substrait_validator/output/
diagnostic.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Module for diagnostic message types.
4//!
5//! Since diagnostic messages are rather important for a validator (after all,
6//! getting a diagnostic message is hardly an exceptional case), they have
7//! quite a bit of metadata attached to them. Ultimately, the diagnostic
8//! messages attached to the tree ([`Diagnostic`]) have the following
9//! parameters:
10//!
11//!  - cause.message: an enumeration of various types of error messages, in
12//!    the usual Rust way. Messages generated by this crate are usually
13//!    untyped (they just use String), but error information from other
14//!    crates is retained as much as possible.
15//!  - cause.classification: an enumeration of various bits of the validation
16//!    process where diagnostics might occur. Each [`Classification`] enum
17//!    variant can be converted to a unique number, known as the diagnostic
18//!    code, which the user of the crate may use to easily programmatically
19//!    determine what caused a diagnostic in a language-agnostic way. The user
20//!    may also configure the validator in advance to promote or reduce the
21//!    severity of diagnostics, indexed by their code. The codes are
22//!    furthermore organized into groups, with up to 999 classes per group: the
23//!    thousands digit and up is the group identifier, and the less-significant
24//!    digits form the sub-code. Sub-code 0 is reserved to refer to the group
25//!    as a whole.
26//!  - original_level: the error [`Level`] that the validation code assigned to
27//!    the message. This can be `Error`, `Warning`, or `Info`, which correspond
28//!    directly to "this is definitely wrong," "this may or may not be wrong,"
29//!    and "this conforms to the Substrait specification, but it's worth noting
30//!    anyway" respectively.
31//!  - adjusted_level: the error [`Level`] after configuration-based adjustment.
32//!    This level is what's used by the high-level APIs to determine the
33//!    validity of a plan. Thus, a user can choose to ignore a particular error
34//!    if their consumer implementation can deal with it anyway, or they can
35//!    assert whether a particular type of warning is actually an error or not.
36//!  - path: a path into the substrait.Plan message. This is *usually* just a
37//!    copy of the path to the node that was being validated when the
38//!    diagnostic was created, but in some cases diagnostics may be placed in a
39//!    parent node (for instance to refer to a node that should exist but
40//!    doesn't), or refer to a different location altogether (for instance to
41//!    point the user to the previous definition in a note following a
42//!    duplicate definition error).
43
44use crate::output::path;
45use num_traits::cast::FromPrimitive;
46use std::sync::Arc;
47use strum::EnumProperty;
48
49/// Owned variant of jsonschema::error::ValidationError<'a>. Instead of a
50/// reference to the YAML tree node that caused the error, this just contains
51/// the formatted error message. The validation error kind and paths are
52/// however retained.
53#[derive(Debug, thiserror::Error)]
54pub struct JsonSchemaValidationError {
55    pub message: String,
56    pub kind: jsonschema::error::ValidationErrorKind,
57    pub instance_path: jsonschema::paths::JSONPointer,
58    pub schema_path: jsonschema::paths::JSONPointer,
59}
60
61impl std::fmt::Display for JsonSchemaValidationError {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        self.message.fmt(f)
64    }
65}
66
67impl From<jsonschema::error::ValidationError<'_>> for JsonSchemaValidationError {
68    fn from(v: jsonschema::error::ValidationError) -> Self {
69        JsonSchemaValidationError {
70            message: v.to_string(),
71            kind: v.kind,
72            instance_path: v.instance_path,
73            schema_path: v.schema_path,
74        }
75    }
76}
77
78/// Enumeration for error message data we might encounter.
79#[derive(Debug, thiserror::Error)]
80pub enum Message {
81    #[error("{0}")]
82    Untyped(String),
83
84    #[error("{0}")]
85    ProstDecodeError(#[from] prost::DecodeError),
86
87    #[error("{0}")]
88    IoError(#[from] std::io::Error),
89
90    #[error("{0}")]
91    UtfError(#[from] std::str::Utf8Error),
92
93    #[error("{0}")]
94    YamlError(#[from] serde_yaml::Error),
95
96    #[error("{0}")]
97    JsonSchemaValidationError(#[from] JsonSchemaValidationError),
98
99    #[error("{0}")]
100    UriError(#[from] uriparse::URIReferenceError),
101
102    #[error("{0}")]
103    GlobError(#[from] glob::PatternError),
104}
105
106impl From<&str> for Message {
107    fn from(s: &str) -> Self {
108        Message::Untyped(s.to_string())
109    }
110}
111
112impl From<String> for Message {
113    fn from(s: String) -> Self {
114        Message::Untyped(s)
115    }
116}
117
118impl From<jsonschema::error::ValidationError<'_>> for Message {
119    fn from(v: jsonschema::error::ValidationError<'_>) -> Self {
120        JsonSchemaValidationError::from(v).into()
121    }
122}
123
124/// Enumeration for the particular types of diagnostics we might encounter.
125///
126/// Numbers must be assigned as follows:
127///  - the group identifier is represented by the thousands digit and up;
128///  - the first classification for each group (i.e. divisible by 1000) is
129///    reserved for diagnostics that have no more specific information
130///    attached to them: their description must be hidden and related to
131///    the group name;
132///  - group 0 is a sort of null group, where no group information is known;
133///  - all enum variant names for classifications belonging to a group (except
134///    the null group) must be prefixed by the group name;
135///  - for backward/forward-compatibility, numbers should not be reassigned.
136///
137/// The Description and HiddenDescription enum properties define a description
138/// of the class. When Description is used, the description is prefixed before
139/// the error message; when HiddenDescription is used, the message is not
140/// prefixed, and should thus be sufficiently specific to not need it. The
141/// latter is useful to reduce the amount of redundant information in a
142/// message.
143#[derive(
144    Clone,
145    Copy,
146    Debug,
147    PartialEq,
148    Eq,
149    Hash,
150    strum_macros::EnumIter,
151    strum_macros::EnumProperty,
152    num_derive::FromPrimitive,
153    Default,
154)]
155pub enum Classification {
156    // Unclassified diagnostics (group 0).
157    #[strum(props(HiddenDescription = "unclassified diagnostic"))]
158    #[default]
159    Unclassified = 0,
160
161    #[strum(props(Description = "not yet implemented"))]
162    NotYetImplemented = 1,
163
164    #[strum(props(Description = "illegal value"))]
165    IllegalValue = 2,
166
167    #[strum(props(Description = "illegal value in hint"))]
168    IllegalValueInHint = 3,
169
170    #[strum(props(Description = "illegal URI"))]
171    IllegalUri = 4,
172
173    #[strum(props(Description = "illegal glob"))]
174    IllegalGlob = 5,
175
176    #[strum(props(Description = "deprecation"))]
177    Deprecation = 6,
178
179    #[strum(props(HiddenDescription = "versioning"))]
180    Versioning = 7,
181
182    #[strum(props(HiddenDescription = "experimental"))]
183    Experimental = 999,
184
185    // Protobuf-related diagnostics (group 1).
186    #[strum(props(HiddenDescription = "protobuf-related diagnostic"))]
187    Proto = 1000,
188
189    #[strum(props(HiddenDescription = "protobuf parsing failed"))]
190    ProtoParseFailed = 1001,
191
192    #[strum(props(Description = "missing required protobuf field"))]
193    ProtoMissingField = 1002,
194
195    #[strum(props(Description = "encountered a protobuf \"any\""))]
196    ProtoAny = 1004,
197
198    #[strum(props(Description = "missing protobuf \"any\" declaration"))]
199    ProtoMissingAnyDeclaration = 1006,
200
201    // YAML-related diagnostics (group 2).
202    #[strum(props(HiddenDescription = "YAML-related diagnostic"))]
203    Yaml = 2000,
204
205    #[strum(props(Description = "did not attempt to resolve YAML"))]
206    YamlResolutionDisabled = 2001,
207
208    #[strum(props(Description = "failed to resolve YAML"))]
209    YamlResolutionFailed = 2002,
210
211    #[strum(props(Description = "failed to parse YAML"))]
212    YamlParseFailed = 2003,
213
214    #[strum(props(Description = "YAML does not conform to schema"))]
215    YamlSchemaValidationFailed = 2004,
216
217    #[strum(props(Description = "missing required YAML key"))]
218    YamlMissingKey = 2005,
219
220    #[strum(props(Description = "missing required YAML array element"))]
221    YamlMissingElement = 2007,
222
223    #[strum(props(Description = "invalid YAML value type"))]
224    YamlInvalidType = 2008,
225
226    #[strum(props(Description = "cyclic dependency"))]
227    YamlCyclicDependency = 2009,
228
229    // Link resolution diagnostics (group 3).
230    #[strum(props(HiddenDescription = "link resolution diagnostic"))]
231    Link = 3000,
232
233    #[strum(props(Description = "failed to resolve anchor"))]
234    LinkMissingAnchor = 3001,
235
236    #[strum(props(HiddenDescription = "use of anchor zero"))]
237    LinkAnchorZero = 3005,
238
239    #[strum(props(Description = "failed to resolve type variation name & class pair"))]
240    LinkMissingTypeVariationNameAndClass = 3006,
241
242    #[strum(props(Description = "unresolved name lookup"))]
243    LinkUnresolvedName = 3007,
244
245    #[strum(props(Description = "ambiguous name lookup"))]
246    LinkAmbiguousName = 3008,
247
248    #[strum(props(Description = "duplicate definition"))]
249    LinkDuplicateDefinition = 3009,
250
251    #[strum(props(HiddenDescription = "invalid compound vs. simple function name usage"))]
252    LinkCompoundVsSimpleFunctionName = 3010,
253
254    // Type-related diagnostics (group 4).
255    #[strum(props(HiddenDescription = "type-related diagnostics"))]
256    Type = 4000,
257
258    #[strum(props(Description = "unknown type"))]
259    TypeUnknown = 4001,
260
261    #[strum(props(Description = "mismatched type parameters"))]
262    TypeMismatchedParameters = 4002,
263
264    #[strum(props(Description = "mismatched field name associations"))]
265    TypeMismatchedFieldNameAssociations = 4003,
266
267    #[strum(props(Description = "invalid swizzle operation"))]
268    TypeInvalidSwizzle = 4004,
269
270    #[strum(props(Description = "mismatched types"))]
271    TypeMismatch = 4005,
272
273    #[strum(props(Description = "struct type is required"))]
274    TypeStructRequired = 4006,
275
276    #[strum(props(Description = "mismatched type variation"))]
277    TypeMismatchedVariation = 4007,
278
279    #[strum(props(Description = "mismatched nullability"))]
280    TypeMismatchedNullability = 4008,
281
282    #[strum(props(Description = "invalid type pattern or derivation expression"))]
283    TypeDerivationInvalid = 4009,
284
285    // Note the difference between above and below! Above should be used when
286    // the derivation itself is invalid due to syntax or metatype errors, or in
287    // other words, when it could *never* match or evaluate, regardless of
288    // context. Below is used when the derivation itself appears to be sane,
289    // but it does not apply to the given context. From a user perspective,
290    // above means that the YAML is wrong, while below means that a function
291    // is used incorrectly in a plan. Note that we cannot detect all problems
292    // with type derivation expressions without evaluating them because they
293    // are dynamically typed.
294    #[strum(props(
295        Description = "type pattern or derivation expression failed to match or evaluate"
296    ))]
297    TypeDerivationFailed = 4010,
298
299    #[strum(props(Description = "parse error in type pattern or derivation expression"))]
300    TypeParseError = 4011,
301
302    #[strum(props(
303        Description = "name resolution error in type pattern or derivation expression"
304    ))]
305    TypeResolutionError = 4012,
306
307    #[strum(props(Description = "invalid field name"))]
308    TypeInvalidFieldName = 4013,
309
310    #[strum(props(Description = "unsupported type pattern or derivation construct"))]
311    TypeDerivationNotSupported = 4014,
312
313    // Relation-related diagnostics (group 5).
314    #[strum(props(HiddenDescription = "relation-related diagnostics"))]
315    Relation = 5000,
316
317    #[strum(props(Description = "missing root relation"))]
318    RelationRootMissing = 5001,
319
320    #[strum(props(Description = "missing relation"))]
321    RelationMissing = 5002,
322
323    #[strum(props(Description = "invalid relation"))]
324    RelationInvalid = 5003,
325
326    // Expression-related diagnostics (group 6).
327    #[strum(props(HiddenDescription = "expression-related diagnostics"))]
328    Expression = 6000,
329
330    #[strum(props(Description = "field reference into non-existent stream"))]
331    ExpressionFieldRefMissingStream = 6001,
332
333    #[strum(props(Description = "illegal literal value"))]
334    ExpressionIllegalLiteralValue = 6002,
335
336    #[strum(props(Description = "function definition unavailable"))]
337    ExpressionFunctionDefinitionUnavailable = 6003,
338
339    #[strum(props(Description = "illegal subquery"))]
340    ExpressionIllegalSubquery = 6004,
341
342    // Redundant declarations (group 7).
343    #[strum(props(
344        HiddenDescription = "diagnostics for pointing out parts of the plan that can be removed without changing its semantics"
345    ))]
346    Redundant = 7000,
347
348    #[strum(props(Description = "redundant protobuf \"any\" declaration"))]
349    RedundantProtoAnyDeclaration = 7001,
350
351    #[strum(props(Description = "redundant extension URI definition"))]
352    RedundantExtensionDefition = 7002,
353
354    #[strum(props(Description = "redundant function declaration"))]
355    RedundantFunctionDeclaration = 7003,
356
357    #[strum(props(Description = "redundant type declaration"))]
358    RedundantTypeDeclaration = 7004,
359
360    #[strum(props(Description = "redundant type variation declaration"))]
361    RedundantTypeVariationDeclaration = 7005,
362
363    #[strum(props(Description = "redundant list slice"))]
364    RedundantListSlice = 7006,
365
366    #[strum(props(Description = "redundant field"))]
367    RedundantField = 7007,
368
369    #[strum(props(Description = "redundant enum variant"))]
370    RedundantEnumVariant = 7008,
371}
372
373impl Classification {
374    /// Returns the complete code for this classification.
375    pub fn code(&self) -> u32 {
376        *self as u32
377    }
378
379    /// Returns the name of the classiciation.
380    pub fn name(&self) -> String {
381        format!("{:?}", self)
382    }
383
384    /// Returns the group code for this classification.
385    pub fn group_code(&self) -> u32 {
386        (*self as u32) / 1000
387    }
388
389    /// Returns the group variant for this classification.
390    pub fn group(&self) -> Classification {
391        Self::from_group(self.group_code())
392            .unwrap_or_else(|| panic!("missing group for {:?}", self))
393    }
394
395    /// Returns the code for this classification within its group.
396    pub fn sub_code(&self) -> u32 {
397        (*self as u32) % 1000
398    }
399
400    /// Returns the description of this classification.
401    pub fn description(&self) -> &str {
402        self.get_str("Description")
403            .or_else(|| self.get_str("HiddenDescription"))
404            .unwrap_or_else(|| {
405                panic!(
406                    "missing Description or HiddenDescription property for {:?}",
407                    self
408                )
409            })
410    }
411
412    /// Returns the classification associated with the given code, if any.
413    pub fn from_code(code: u32) -> Option<Self> {
414        Self::from_u32(code)
415    }
416
417    /// Returns the group classification associated with the given code, if
418    /// any.
419    pub fn group_from_code(code: u32) -> Option<Self> {
420        Self::from_group(code / 1000)
421    }
422
423    /// Returns the group classification associated with the given group.
424    pub fn from_group(group: u32) -> Option<Self> {
425        Self::from_u32(group * 1000)
426    }
427
428    /// Returns the "parent" code for the given code. For non-group codes, this
429    /// is the code of their group (code rounded down to thousands). For group
430    /// codes, this is 0.
431    pub fn parent(code: u32) -> u32 {
432        if code % 1000 != 0 {
433            (code / 1000) * 1000
434        } else {
435            0
436        }
437    }
438
439    /// Formats a Message with this classification.
440    pub fn format_message(
441        &self,
442        message: &Message,
443        f: &mut std::fmt::Formatter,
444    ) -> std::fmt::Result {
445        if let Some(description) = self.get_str("Description") {
446            write!(f, "{description}: ")?;
447        }
448        write!(f, "{message} (code {:04})", self.code())
449    }
450}
451
452impl From<Classification> for u32 {
453    /// Converts a Classification into its error code.
454    fn from(classification: Classification) -> Self {
455        classification.code()
456    }
457}
458
459/// Description of the cause of a diagnostic.
460#[derive(Clone, Debug, thiserror::Error)]
461pub struct Cause {
462    /// The error message. Within this crate we don't bother typing these
463    /// beyond the Classification enum, but we do retain typing information for
464    /// messages from other crates.
465    pub message: Arc<Message>,
466
467    /// Classification of this cause. This attaches an error code and generic
468    /// message for said code to the diagnostic message. The user can use these
469    /// codes to for instance always promote a particular type of diagnostic to
470    /// an error (like gcc -Werror).
471    pub classification: Classification,
472}
473
474impl PartialEq for Cause {
475    fn eq(&self, other: &Self) -> bool {
476        self.message.to_string() == other.message.to_string()
477            && self.classification == other.classification
478    }
479}
480
481impl std::fmt::Display for Cause {
482    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
483        self.classification.format_message(&self.message, f)
484    }
485}
486
487impl Cause {
488    /// Prefixes the message with context information.
489    pub fn prefix<S: AsRef<str>>(self, prefix: S) -> Cause {
490        Cause {
491            message: Arc::new(Message::from(format!(
492                "{}: {}",
493                prefix.as_ref(),
494                self.message
495            ))),
496            classification: self.classification,
497        }
498    }
499}
500
501/// Convenience/shorthand macro for creating error diagnostics. Use this
502/// variant when you have something that can be cast into a Message via into(),
503/// like a pre-formatted string or a compatible Error type from a dependency.
504macro_rules! ecause {
505    ($class:ident, $message:expr) => {
506        crate::output::diagnostic::Cause {
507            message: std::sync::Arc::new($message.into()),
508            classification: crate::output::diagnostic::Classification::$class,
509        }
510    };
511}
512
513/// Convenience/shorthand macro for creating error diagnostics. Use this
514/// variant when you want to format a string. The argument list beyond the
515/// diagnostic class identifier is passed straight to [`format!`].
516macro_rules! cause {
517    ($class:ident, $($args:expr),*) => {
518        ecause!($class, format!($($args),*))
519    };
520}
521
522/// Result type for diagnostic causes.
523pub type Result<T> = std::result::Result<T, Cause>;
524
525/// Error level for a diagnostic message.
526#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
527pub enum Level {
528    /// Level used for diagnostics that don't point out anything wrong with
529    /// the plan, and merely provide additional information.
530    Info,
531
532    /// Level used for diagnostics that may or may not indicate that there
533    /// is something wrong with the plan, i.e. the plan *could* be valid,
534    /// but the validator isn't sure.
535    Warning,
536
537    /// Level used for diagnostics that indicate that there is definitely
538    /// something wrong with the plan.
539    Error,
540}
541
542/// A diagnostic message, without configuration-based level override.
543#[derive(Clone, Debug, PartialEq, thiserror::Error)]
544pub struct RawDiagnostic {
545    /// The cause of the diagnostic.
546    pub cause: Cause,
547
548    /// The severity of the diagnostic.
549    pub level: Level,
550
551    /// The path within the protobuf message where the diagnostic occurred.
552    pub path: path::PathBuf,
553}
554
555impl std::fmt::Display for RawDiagnostic {
556    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
557        write!(f, "{:?}", self.level)?;
558        if !f.alternate() {
559            write!(f, " at {}", self.path)?;
560        }
561        write!(f, ": {}", self.cause)
562    }
563}
564
565/// A diagnostic message, including configuration-based level override.
566#[derive(Clone, Debug, PartialEq, thiserror::Error)]
567pub struct Diagnostic {
568    /// The cause of the diagnostic.
569    pub cause: Cause,
570
571    /// The original severity of the diagnostic.
572    pub original_level: Level,
573
574    /// The severity of the diagnostic after application of configuration.
575    pub adjusted_level: Level,
576
577    /// The path within the protobuf message where the diagnostic occurred.
578    pub path: path::PathBuf,
579}
580
581impl std::fmt::Display for Diagnostic {
582    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
583        write!(f, "{:?}", self.adjusted_level)?;
584        match self.original_level.cmp(&self.adjusted_level) {
585            std::cmp::Ordering::Less => write!(f, " (upgraded from {:?})", self.original_level)?,
586            std::cmp::Ordering::Equal => {}
587            std::cmp::Ordering::Greater => {
588                write!(f, " (downgraded from {:?})", self.original_level)?
589            }
590        }
591        if !f.alternate() {
592            write!(f, " at {}", self.path)?;
593        }
594        write!(f, ": {}", self.cause)
595    }
596}
597
598impl RawDiagnostic {
599    /// Converts to an AdjustedDiagnostic by adding an adjusted level.
600    pub fn adjust_level(self, adjusted_level: Level) -> Diagnostic {
601        Diagnostic {
602            cause: self.cause,
603            original_level: self.level,
604            adjusted_level,
605            path: self.path,
606        }
607    }
608}
609
610/// Convenience/shorthand macro for creating error diagnostics.
611macro_rules! diag {
612    ($path:expr, $level:ident, $class:ident, $($args:expr),*) => {
613        diag!($path, $level, cause!($class, $($args),*))
614    };
615    ($path:expr, $level:ident, $cause:expr) => {
616        crate::output::diagnostic::RawDiagnostic {
617            cause: $cause,
618            level: crate::output::diagnostic::Level::$level,
619            path: $path
620        }
621    };
622}
623/*macro_rules! ediag {
624    ($path:expr, $level:ident, $class:ident, $err:expr) => {
625        diag!($path, $level, ecause!($class, $err))
626    };
627}*/
628
629/// Result type for complete diagnostics, including path.
630pub type DiagResult<T> = std::result::Result<T, RawDiagnostic>;
631
632#[cfg(test)]
633mod tests {
634    use super::*;
635    use std::collections::HashSet;
636    use strum::IntoEnumIterator;
637
638    #[test]
639    fn test_diagnostic_classifications() {
640        // Check validity of the classifications definitions.
641        let mut descriptions = HashSet::new();
642        for class in Classification::iter() {
643            let group = class.group();
644            if group != Classification::Unclassified {
645                assert!(
646                    class.name().starts_with(&group.name()),
647                    "incorrect group prefix for {:?}, should start with {:?}",
648                    class,
649                    group
650                );
651            }
652            assert!(
653                descriptions.insert(class.description().to_string()),
654                "duplicate description for {:?}",
655                class
656            );
657        }
658    }
659}