1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
// SPDX-License-Identifier: Apache-2.0

//! This module provides the configuration structure for the validator.
//!
//! This structure, [`Config`], is to be constructed by the application using
//! the validator to configure it. Alternatively, the default configuration can
//! be constructed by using the [`std::default::Default`] trait.

use crate::output::diagnostic;
pub use glob;
use std::collections::HashMap;

/// Trait object representing some immutable binary data.
pub type BinaryData = Box<dyn AsRef<[u8]>>;

/// Trait object representing some error data.
pub type ErrorData = Box<dyn std::error::Error>;

/// Callback function type for resolving/downloading URIs.
pub type UriResolver = Box<dyn Fn(&str) -> std::result::Result<BinaryData, ErrorData> + Send>;

/// Attempts to resolve and fetch the data for the given URI using libcurl,
/// allowing the validator to handle remote YAML extension URLs with most
/// protocols.
#[cfg(feature = "curl")]
fn resolve_with_curl(uri: &str) -> Result<Vec<u8>, curl::Error> {
    let mut binary_data: Vec<u8> = vec![];
    let mut curl_handle = curl::easy::Easy::new();
    curl_handle.url(uri)?;
    {
        let mut transfer = curl_handle.transfer();
        transfer.write_function(|buf| {
            binary_data.extend_from_slice(buf);
            Ok(buf.len())
        })?;
        transfer.perform()?;
    }
    Ok(binary_data)
}

/// Configuration structure.
pub struct Config {
    /// When set, do not generate warnings for unknown protobuf fields that are
    /// set to their protobuf-defined default value.
    pub ignore_unknown_fields: bool,

    /// Protobuf message URLs that are explicitly allowed for use in "any"
    /// messages, i.e. that the caller warrants the existence of in the
    /// consumer that the plan is validated for.
    pub allowed_proto_any_urls: Vec<glob::Pattern>,

    /// Allows the level of diagnostic messages to be overridden based on their
    /// classification/code. The logic for this is as follows:
    ///
    ///  - if an entry exists for the classication of the incoming diagnostic,
    ///    override its error level to at most the second argument, and then to
    ///    at least the first argument. Otherwise,
    ///  - if an entry exists for the group of said classification, use its
    ///    level limits instead. Otherwise,
    ///  - if an entry exists for Unclassified (code 0), use its level limits
    ///    instead. Otherwise, do not adjust the level.
    ///
    /// Note that setting an entry to  (Info, Error) leaves the diagnostic
    /// level unchanged.
    pub diagnostic_level_overrides:
        HashMap<diagnostic::Classification, (diagnostic::Level, diagnostic::Level)>,

    /// Allows URIs from the plan to be remapped (Some(mapping)) or ignored
    /// (None). All resolution can effectively be disabled by just adding a
    /// rule that maps * to None. Furthermore, in the absence of a custom
    /// yaml_uri_resolver function, this can be used to remap URIs to
    /// pre-downloaded files.
    pub uri_overrides: Vec<(glob::Pattern, Option<String>)>,

    /// Optional callback function for resolving URIs. If specified, all
    /// URIs (after processing yaml_uri_overrides) are resolved using this
    /// function. The function takes the URI as its argument, and should either
    /// return the download contents as a Vec<u8> or return a String-based
    /// error. If no downloader is specified, only file:// URLs with an
    /// absolute path are supported.
    pub uri_resolver: Option<UriResolver>,

    /// Optional URI resolution depth. If specified, dependencies are only
    /// resolved this many levels deep. Setting this to zero effectively
    /// disables extension URI resolution altogether.
    pub max_uri_resolution_depth: Option<usize>,
}

// TODO: enable URI resolution by default once all that works. Then this can
// be derived again. Also still need to expose the depth option in extensions.
impl Default for Config {
    fn default() -> Self {
        Self {
            ignore_unknown_fields: Default::default(),
            allowed_proto_any_urls: Default::default(),
            diagnostic_level_overrides: Default::default(),
            uri_overrides: Default::default(),
            uri_resolver: Default::default(),
            max_uri_resolution_depth: Some(0),
        }
    }
}

impl Config {
    /// Creates a default configuration.
    pub fn new() -> Self {
        Self::default()
    }

    /// Instructs the validator to ignore protobuf fields that it doesn't know
    /// about yet (i.e., that have been added to the Substrait protobuf
    /// descriptions, but haven't yet been implemented in the validator) if the
    /// fields are set to their default value. If this option isn't set, or if
    /// an unknown field is not set to its default value, a warning is emitted.
    pub fn ignore_unknown_fields(&mut self) {
        self.ignore_unknown_fields = true;
    }

    /// Explicitly allows a protobuf message type to be used in advanced
    /// extensions, despite the fact that the validator can't validate it. If
    /// an advanced extension is encountered that isn't explicitly allowed, a
    /// warning is emitted.
    pub fn allow_proto_any_url(&mut self, pattern: glob::Pattern) {
        self.allowed_proto_any_urls.push(pattern);
    }

    /// Sets a minimum and/or maximum error level for the given class of
    /// diagnostic messages. Any previous settings for this class are
    /// overridden.
    pub fn override_diagnostic_level(
        &mut self,
        class: diagnostic::Classification,
        minimum: diagnostic::Level,
        maximum: diagnostic::Level,
    ) {
        self.diagnostic_level_overrides
            .insert(class, (minimum, maximum));
    }

    /// Overrides the resolution behavior for (YAML) URIs matching the given
    /// pattern. If resolve_as is None, the URI file will not be resolved;
    /// if it is Some(s), it will be resolved as if the URI in the plan had
    /// been s.
    pub fn override_uri<S: Into<String>>(&mut self, pattern: glob::Pattern, resolve_as: Option<S>) {
        self.uri_overrides
            .push((pattern, resolve_as.map(|s| s.into())));
    }

    /// Registers a URI resolution function with this configuration. If
    /// the given function fails, any previously registered function will be
    /// used as a fallback.
    pub fn add_uri_resolver<F, D, E>(&mut self, resolver: F)
    where
        F: Fn(&str) -> Result<D, E> + Send + 'static,
        D: AsRef<[u8]> + 'static,
        E: std::error::Error + 'static,
    {
        let previous = self.uri_resolver.take();
        self.uri_resolver = Some(Box::new(move |uri| match resolver(uri) {
            Ok(d) => Ok(Box::new(d)),
            Err(e) => match &previous {
                Some(f) => f.as_ref()(uri),
                None => Err(Box::new(e)),
            },
        }));
    }

    /// Registers a URI resolver based on libcurl. If libcurl fails, any
    /// `uri_resolver` registered previously will be used as a fallback.
    #[cfg(feature = "curl")]
    pub fn add_curl_uri_resolver(&mut self) {
        self.add_uri_resolver(resolve_with_curl)
    }

    /// Sets the maximum recursion depth for URI resolution, in the presence of
    /// transitive dependencies. Setting this to None disables the limit,
    /// setting this to zero disables URI resolution entirely.
    pub fn set_max_uri_resolution_depth(&mut self, depth: Option<usize>) {
        self.max_uri_resolution_depth = depth;
    }
}