substrait_validator/input/config.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! This module provides the configuration structure for the validator.
4//!
5//! This structure, [`Config`], is to be constructed by the application using
6//! the validator to configure it. Alternatively, the default configuration can
7//! be constructed by using the [`std::default::Default`] trait.
8
9use crate::output::diagnostic;
10pub use glob;
11use std::collections::HashMap;
12
13/// Trait object representing some immutable binary data.
14pub type BinaryData = Box<dyn AsRef<[u8]>>;
15
16/// Trait object representing some error data.
17pub type ErrorData = Box<dyn std::error::Error>;
18
19/// Callback function type for resolving/downloading URIs.
20pub type UriResolver = Box<dyn Fn(&str) -> std::result::Result<BinaryData, ErrorData> + Send>;
21
22/// Attempts to resolve and fetch the data for the given URI using libcurl,
23/// allowing the validator to handle remote YAML extension URLs with most
24/// protocols.
25#[cfg(feature = "curl")]
26fn resolve_with_curl(uri: &str) -> Result<Vec<u8>, curl::Error> {
27 let mut binary_data: Vec<u8> = vec![];
28 let mut curl_handle = curl::easy::Easy::new();
29 curl_handle.url(uri)?;
30 {
31 let mut transfer = curl_handle.transfer();
32 transfer.write_function(|buf| {
33 binary_data.extend_from_slice(buf);
34 Ok(buf.len())
35 })?;
36 transfer.perform()?;
37 }
38 Ok(binary_data)
39}
40
41/// Configuration structure.
42pub struct Config {
43 /// When set, do not generate warnings for unknown protobuf fields that are
44 /// set to their protobuf-defined default value.
45 pub ignore_unknown_fields: bool,
46
47 /// Protobuf message URLs that are explicitly allowed for use in "any"
48 /// messages, i.e. that the caller warrants the existence of in the
49 /// consumer that the plan is validated for.
50 pub allowed_proto_any_urls: Vec<glob::Pattern>,
51
52 /// Allows the level of diagnostic messages to be overridden based on their
53 /// classification/code. The logic for this is as follows:
54 ///
55 /// - if an entry exists for the classication of the incoming diagnostic,
56 /// override its error level to at most the second argument, and then to
57 /// at least the first argument. Otherwise,
58 /// - if an entry exists for the group of said classification, use its
59 /// level limits instead. Otherwise,
60 /// - if an entry exists for Unclassified (code 0), use its level limits
61 /// instead. Otherwise, do not adjust the level.
62 ///
63 /// Note that setting an entry to (Info, Error) leaves the diagnostic
64 /// level unchanged.
65 pub diagnostic_level_overrides:
66 HashMap<diagnostic::Classification, (diagnostic::Level, diagnostic::Level)>,
67
68 /// Allows URIs from the plan to be remapped (Some(mapping)) or ignored
69 /// (None). All resolution can effectively be disabled by just adding a
70 /// rule that maps * to None. Furthermore, in the absence of a custom
71 /// yaml_uri_resolver function, this can be used to remap URIs to
72 /// pre-downloaded files.
73 pub uri_overrides: Vec<(glob::Pattern, Option<String>)>,
74
75 /// Optional callback function for resolving URIs. If specified, all URIs
76 /// (after processing yaml_uri_overrides) are resolved using this function.
77 /// The function takes the URI as its argument, and should either return the
78 /// download contents as a `Vec<u8>` or return a String-based error. If no
79 /// downloader is specified, only file:// URLs with an absolute path are
80 /// supported.
81 pub uri_resolver: Option<UriResolver>,
82
83 /// Optional URI resolution depth. If specified, dependencies are only
84 /// resolved this many levels deep. Setting this to zero effectively
85 /// disables extension URI resolution altogether.
86 pub max_uri_resolution_depth: Option<usize>,
87}
88
89// TODO: enable URI resolution by default once all that works. Then this can
90// be derived again. Also still need to expose the depth option in extensions.
91impl Default for Config {
92 fn default() -> Self {
93 Self {
94 ignore_unknown_fields: Default::default(),
95 allowed_proto_any_urls: Default::default(),
96 diagnostic_level_overrides: Default::default(),
97 uri_overrides: Default::default(),
98 uri_resolver: Default::default(),
99 max_uri_resolution_depth: Some(0),
100 }
101 }
102}
103
104impl Config {
105 /// Creates a default configuration.
106 pub fn new() -> Self {
107 Self::default()
108 }
109
110 /// Instructs the validator to ignore protobuf fields that it doesn't know
111 /// about yet (i.e., that have been added to the Substrait protobuf
112 /// descriptions, but haven't yet been implemented in the validator) if the
113 /// fields are set to their default value. If this option isn't set, or if
114 /// an unknown field is not set to its default value, a warning is emitted.
115 pub fn ignore_unknown_fields(&mut self) {
116 self.ignore_unknown_fields = true;
117 }
118
119 /// Explicitly allows a protobuf message type to be used in advanced
120 /// extensions, despite the fact that the validator can't validate it. If
121 /// an advanced extension is encountered that isn't explicitly allowed, a
122 /// warning is emitted.
123 pub fn allow_proto_any_url(&mut self, pattern: glob::Pattern) {
124 self.allowed_proto_any_urls.push(pattern);
125 }
126
127 /// Sets a minimum and/or maximum error level for the given class of
128 /// diagnostic messages. Any previous settings for this class are
129 /// overridden.
130 pub fn override_diagnostic_level(
131 &mut self,
132 class: diagnostic::Classification,
133 minimum: diagnostic::Level,
134 maximum: diagnostic::Level,
135 ) {
136 self.diagnostic_level_overrides
137 .insert(class, (minimum, maximum));
138 }
139
140 /// Overrides the resolution behavior for (YAML) URIs matching the given
141 /// pattern. If resolve_as is None, the URI file will not be resolved;
142 /// if it is Some(s), it will be resolved as if the URI in the plan had
143 /// been s.
144 pub fn override_uri<S: Into<String>>(&mut self, pattern: glob::Pattern, resolve_as: Option<S>) {
145 self.uri_overrides
146 .push((pattern, resolve_as.map(|s| s.into())));
147 }
148
149 /// Registers a URI resolution function with this configuration. If
150 /// the given function fails, any previously registered function will be
151 /// used as a fallback.
152 pub fn add_uri_resolver<F, D, E>(&mut self, resolver: F)
153 where
154 F: Fn(&str) -> Result<D, E> + Send + 'static,
155 D: AsRef<[u8]> + 'static,
156 E: std::error::Error + 'static,
157 {
158 let previous = self.uri_resolver.take();
159 self.uri_resolver = Some(Box::new(move |uri| match resolver(uri) {
160 Ok(d) => Ok(Box::new(d)),
161 Err(e) => match &previous {
162 Some(f) => f.as_ref()(uri),
163 None => Err(Box::new(e)),
164 },
165 }));
166 }
167
168 /// Registers a URI resolver based on libcurl. If libcurl fails, any
169 /// `uri_resolver` registered previously will be used as a fallback.
170 #[cfg(feature = "curl")]
171 pub fn add_curl_uri_resolver(&mut self) {
172 self.add_uri_resolver(resolve_with_curl)
173 }
174
175 /// Sets the maximum recursion depth for URI resolution, in the presence of
176 /// transitive dependencies. Setting this to None disables the limit,
177 /// setting this to zero disables URI resolution entirely.
178 pub fn set_max_uri_resolution_depth(&mut self, depth: Option<usize>) {
179 self.max_uri_resolution_depth = depth;
180 }
181}