substrait_validator/input/
config.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! This module provides the configuration structure for the validator.
4//!
5//! This structure, [`Config`], is to be constructed by the application using
6//! the validator to configure it. Alternatively, the default configuration can
7//! be constructed by using the [`std::default::Default`] trait.
8
9use crate::output::diagnostic;
10pub use glob;
11use std::collections::HashMap;
12
13/// Trait object representing some immutable binary data.
14pub type BinaryData = Box<dyn AsRef<[u8]>>;
15
16/// Trait object representing some error data.
17pub type ErrorData = Box<dyn std::error::Error>;
18
19/// Callback function type for resolving/downloading URIs.
20pub type UriResolver = Box<dyn Fn(&str) -> std::result::Result<BinaryData, ErrorData> + Send>;
21
22/// Attempts to resolve and fetch the data for the given URI using libcurl,
23/// allowing the validator to handle remote YAML extension URLs with most
24/// protocols.
25#[cfg(feature = "curl")]
26fn resolve_with_curl(uri: &str) -> Result<Vec<u8>, curl::Error> {
27    let mut binary_data: Vec<u8> = vec![];
28    let mut curl_handle = curl::easy::Easy::new();
29    curl_handle.url(uri)?;
30    {
31        let mut transfer = curl_handle.transfer();
32        transfer.write_function(|buf| {
33            binary_data.extend_from_slice(buf);
34            Ok(buf.len())
35        })?;
36        transfer.perform()?;
37    }
38    Ok(binary_data)
39}
40
41/// Configuration structure.
42pub struct Config {
43    /// When set, do not generate warnings for unknown protobuf fields that are
44    /// set to their protobuf-defined default value.
45    pub ignore_unknown_fields: bool,
46
47    /// Protobuf message URLs that are explicitly allowed for use in "any"
48    /// messages, i.e. that the caller warrants the existence of in the
49    /// consumer that the plan is validated for.
50    pub allowed_proto_any_urls: Vec<glob::Pattern>,
51
52    /// Allows the level of diagnostic messages to be overridden based on their
53    /// classification/code. The logic for this is as follows:
54    ///
55    ///  - if an entry exists for the classication of the incoming diagnostic,
56    ///    override its error level to at most the second argument, and then to
57    ///    at least the first argument. Otherwise,
58    ///  - if an entry exists for the group of said classification, use its
59    ///    level limits instead. Otherwise,
60    ///  - if an entry exists for Unclassified (code 0), use its level limits
61    ///    instead. Otherwise, do not adjust the level.
62    ///
63    /// Note that setting an entry to  (Info, Error) leaves the diagnostic
64    /// level unchanged.
65    pub diagnostic_level_overrides:
66        HashMap<diagnostic::Classification, (diagnostic::Level, diagnostic::Level)>,
67
68    /// Allows URIs from the plan to be remapped (Some(mapping)) or ignored
69    /// (None). All resolution can effectively be disabled by just adding a
70    /// rule that maps * to None. Furthermore, in the absence of a custom
71    /// yaml_uri_resolver function, this can be used to remap URIs to
72    /// pre-downloaded files.
73    pub uri_overrides: Vec<(glob::Pattern, Option<String>)>,
74
75    /// Optional callback function for resolving URIs. If specified, all URIs
76    /// (after processing yaml_uri_overrides) are resolved using this function.
77    /// The function takes the URI as its argument, and should either return the
78    /// download contents as a `Vec<u8>` or return a String-based error. If no
79    /// downloader is specified, only file:// URLs with an absolute path are
80    /// supported.
81    pub uri_resolver: Option<UriResolver>,
82
83    /// Optional URI resolution depth. If specified, dependencies are only
84    /// resolved this many levels deep. Setting this to zero effectively
85    /// disables extension URI resolution altogether.
86    pub max_uri_resolution_depth: Option<usize>,
87}
88
89// TODO: enable URI resolution by default once all that works. Then this can
90// be derived again. Also still need to expose the depth option in extensions.
91impl Default for Config {
92    fn default() -> Self {
93        Self {
94            ignore_unknown_fields: Default::default(),
95            allowed_proto_any_urls: Default::default(),
96            diagnostic_level_overrides: Default::default(),
97            uri_overrides: Default::default(),
98            uri_resolver: Default::default(),
99            max_uri_resolution_depth: Some(0),
100        }
101    }
102}
103
104impl Config {
105    /// Creates a default configuration.
106    pub fn new() -> Self {
107        Self::default()
108    }
109
110    /// Instructs the validator to ignore protobuf fields that it doesn't know
111    /// about yet (i.e., that have been added to the Substrait protobuf
112    /// descriptions, but haven't yet been implemented in the validator) if the
113    /// fields are set to their default value. If this option isn't set, or if
114    /// an unknown field is not set to its default value, a warning is emitted.
115    pub fn ignore_unknown_fields(&mut self) {
116        self.ignore_unknown_fields = true;
117    }
118
119    /// Explicitly allows a protobuf message type to be used in advanced
120    /// extensions, despite the fact that the validator can't validate it. If
121    /// an advanced extension is encountered that isn't explicitly allowed, a
122    /// warning is emitted.
123    pub fn allow_proto_any_url(&mut self, pattern: glob::Pattern) {
124        self.allowed_proto_any_urls.push(pattern);
125    }
126
127    /// Sets a minimum and/or maximum error level for the given class of
128    /// diagnostic messages. Any previous settings for this class are
129    /// overridden.
130    pub fn override_diagnostic_level(
131        &mut self,
132        class: diagnostic::Classification,
133        minimum: diagnostic::Level,
134        maximum: diagnostic::Level,
135    ) {
136        self.diagnostic_level_overrides
137            .insert(class, (minimum, maximum));
138    }
139
140    /// Overrides the resolution behavior for (YAML) URIs matching the given
141    /// pattern. If resolve_as is None, the URI file will not be resolved;
142    /// if it is Some(s), it will be resolved as if the URI in the plan had
143    /// been s.
144    pub fn override_uri<S: Into<String>>(&mut self, pattern: glob::Pattern, resolve_as: Option<S>) {
145        self.uri_overrides
146            .push((pattern, resolve_as.map(|s| s.into())));
147    }
148
149    /// Registers a URI resolution function with this configuration. If
150    /// the given function fails, any previously registered function will be
151    /// used as a fallback.
152    pub fn add_uri_resolver<F, D, E>(&mut self, resolver: F)
153    where
154        F: Fn(&str) -> Result<D, E> + Send + 'static,
155        D: AsRef<[u8]> + 'static,
156        E: std::error::Error + 'static,
157    {
158        let previous = self.uri_resolver.take();
159        self.uri_resolver = Some(Box::new(move |uri| match resolver(uri) {
160            Ok(d) => Ok(Box::new(d)),
161            Err(e) => match &previous {
162                Some(f) => f.as_ref()(uri),
163                None => Err(Box::new(e)),
164            },
165        }));
166    }
167
168    /// Registers a URI resolver based on libcurl. If libcurl fails, any
169    /// `uri_resolver` registered previously will be used as a fallback.
170    #[cfg(feature = "curl")]
171    pub fn add_curl_uri_resolver(&mut self) {
172        self.add_uri_resolver(resolve_with_curl)
173    }
174
175    /// Sets the maximum recursion depth for URI resolution, in the presence of
176    /// transitive dependencies. Setting this to None disables the limit,
177    /// setting this to zero disables URI resolution entirely.
178    pub fn set_max_uri_resolution_depth(&mut self, depth: Option<usize>) {
179        self.max_uri_resolution_depth = depth;
180    }
181}