substrait_validator/parse/
plan.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Module providing toplevel parse/validation functions for plans.
4
5#![allow(clippy::ptr_arg)]
6
7use crate::input::proto::substrait;
8use crate::output::diagnostic;
9use crate::output::type_system::data;
10use crate::parse::context;
11use crate::parse::extensions;
12use crate::parse::relations;
13
14// Parse a relation root, i.e. a toplevel relation that includes field name
15// information.
16fn parse_rel_root(x: &substrait::RelRoot, y: &mut context::Context) -> diagnostic::Result<()> {
17    // Parse the fields.
18    let schema = proto_required_field!(x, y, input, relations::parse_rel)
19        .0
20        .data_type();
21    proto_repeated_field!(x, y, names);
22
23    // Relate the names to the schema.
24    let schema = schema
25        .apply_field_names(&x.names)
26        .map_err(|x| diagnostic!(y, Error, x))
27        .unwrap_or_default();
28    y.set_schema(schema);
29
30    // Describe the node.
31    describe!(y, Misc, "Named relation root");
32    summary!(y, "Attaches names to result schema");
33    Ok(())
34}
35
36// Parse a relation type.
37fn parse_rel_type(
38    x: &substrait::plan_rel::RelType,
39    y: &mut context::Context,
40) -> diagnostic::Result<data::Type> {
41    match x {
42        substrait::plan_rel::RelType::Rel(x) => {
43            relations::parse_rel(x, y)?;
44            Ok(y.data_type().strip_field_names())
45        }
46        substrait::plan_rel::RelType::Root(x) => {
47            parse_rel_root(x, y)?;
48            Ok(y.data_type())
49        }
50    }
51}
52
53/// Parse a PlanRel node.
54fn parse_plan_rel(x: &substrait::PlanRel, y: &mut context::Context) -> diagnostic::Result<()> {
55    let data_type = y.enter_relation_root(|y| {
56        proto_required_field!(x, y, rel_type, parse_rel_type)
57            .1
58            .unwrap_or_default()
59    });
60
61    // Describe the node.
62    y.set_data_type(data_type);
63    describe!(y, Misc, "Relation root");
64    Ok(())
65}
66
67/// Parse a git hash string.
68fn parse_git_hash(x: &String, y: &mut context::Context) -> diagnostic::Result<()> {
69    if !x.is_empty() {
70        static GIT_HASH_RE: once_cell::sync::Lazy<regex::Regex> =
71            once_cell::sync::Lazy::new(|| regex::Regex::new("[0-9a-f]{40}").unwrap());
72        if !GIT_HASH_RE.is_match(x) {
73            diagnostic!(
74                y,
75                Error,
76                IllegalValue,
77                "git hash must be a 40-character lowercase hexadecimal string \
78                if specified."
79            );
80        }
81        diagnostic!(
82            y,
83            Warning,
84            Versioning,
85            "a git hash was specified for the Substrait version, indicating \
86            use of nonstandard features. The validation result may not be \
87            accurate."
88        );
89    }
90    Ok(())
91}
92
93/// Parse a producer identification string.
94fn parse_producer_id(x: &String, y: &mut context::Context) -> diagnostic::Result<()> {
95    if x.is_empty() {
96        diagnostic!(
97            y,
98            Info,
99            Versioning,
100            "producer identifier is missing. While not strictly necessary, \
101            especially for hand-written plans, it is strongly recommended to \
102            include one. This allows consumers to work around unforeseen \
103            problems specific to your producer."
104        );
105    }
106    Ok(())
107}
108
109/// Parse a version node.
110fn parse_version(x: &substrait::Version, y: &mut context::Context) -> diagnostic::Result<()> {
111    // Parse the version information.
112    let major = proto_primitive_field!(x, y, major_number)
113        .1
114        .unwrap_or_default() as u64;
115    let minor = proto_primitive_field!(x, y, minor_number)
116        .1
117        .unwrap_or_default() as u64;
118    let patch = proto_primitive_field!(x, y, patch_number)
119        .1
120        .unwrap_or_default() as u64;
121    let version = semver::Version::new(major, minor, patch);
122    if version == semver::Version::new(0, 0, 0) {
123        diagnostic!(y, Error, Versioning, "invalid plan version (0.0.0)");
124    } else if !crate::substrait_version_req_loose().matches(&version) {
125        diagnostic!(
126            y,
127            Warning,
128            Versioning,
129            "plan version ({}) is not compatible with the Substrait \
130            version that this version of the validator validates ({}).",
131            version,
132            crate::substrait_version()
133        );
134    } else if !crate::substrait_version_req().matches(&version) {
135        diagnostic!(
136            y,
137            Warning,
138            Versioning,
139            "cannot automatically determine whether plan version ({}) is \
140            compatible with the Substrait version that this version of \
141            the validator validates ({}). Please check the release notes \
142            between these versions, or install the correct version of the \
143            validator. See also \
144            https://github.com/substrait-io/substrait/pull/210#discussion_r881965837",
145            version,
146            crate::substrait_version()
147        );
148    };
149
150    // Check hash.
151    proto_primitive_field!(x, y, git_hash, parse_git_hash);
152
153    // Check producer information.
154    proto_primitive_field!(x, y, producer, parse_producer_id);
155
156    Ok(())
157}
158
159/// Report the "validator is experimental" diagnostic.
160pub fn mark_experimental(ctx: &mut context::Context) {
161    diagnostic!(
162        ctx,
163        Info,
164        Experimental,
165        "this version of the validator is EXPERIMENTAL. Please report issues \
166        via https://github.com/substrait-io/substrait-validator/issues/new"
167    );
168}
169
170/// Toplevel parse function for a plan.
171pub fn parse_plan(plan: &substrait::Plan, ctx: &mut context::Context) {
172    mark_experimental(ctx);
173
174    // Parse the version.
175    proto_required_field!(plan, ctx, version, parse_version);
176
177    // Handle extensions first, because we'll need their declarations to
178    // correctly interpret the relations.
179    extensions::parse_plan(plan, ctx);
180
181    // Handle the relations.
182    let num_relations = proto_repeated_field!(plan, ctx, relations, parse_plan_rel)
183        .0
184        .len();
185    if num_relations == 0 {
186        diagnostic!(
187            ctx,
188            Error,
189            RelationRootMissing,
190            "a plan must have at least one relation"
191        );
192    }
193
194    // Generate an Info diagnostic for every extension definition that wasn't
195    // used at any point, and can thus be safely removed.
196    extensions::check_unused_definitions(ctx);
197}
198
199/// Toplevel validation function for a plan. Validates that the PlanVersion
200/// matches expected format, pushing errors to the `Context`.
201pub fn parse_plan_version(tree: &substrait::PlanVersion, ctx: &mut context::Context) {
202    mark_experimental(ctx);
203
204    // Parse the version.
205    proto_required_field!(tree, ctx, version, parse_version);
206}