stac_validate/
validator.rs

1use crate::{Error, Result};
2use async_recursion::async_recursion;
3use async_trait::async_trait;
4use fluent_uri::Uri;
5use jsonschema::{AsyncRetrieve, Resource, ValidationOptions, Validator as JsonschemaValidator};
6use reqwest::Client;
7use serde::Serialize;
8use serde_json::{Map, Value};
9use stac::{Type, Version};
10use std::collections::HashMap;
11use std::sync::Arc;
12
13const SCHEMA_BASE: &str = "https://schemas.stacspec.org";
14
15/// A structure for validating STAC.
16pub struct Validator {
17    validators: HashMap<Uri<String>, JsonschemaValidator>,
18    validation_options: ValidationOptions<Arc<dyn referencing::AsyncRetrieve>>,
19}
20
21#[derive(Debug)]
22struct Retriever(Client);
23
24impl Validator {
25    /// Creates a new validator.
26    ///
27    /// # Examples
28    ///
29    /// ```
30    /// use stac_validate::Validator;
31    ///
32    /// #[tokio::main]
33    /// async fn main() {
34    ///     let validator = Validator::new().await.unwrap();
35    /// }
36    /// ```
37    pub async fn new() -> Result<Validator> {
38        let validation_options = jsonschema::async_options();
39        let validation_options = validation_options
40            .with_resources(prebuild_resources().into_iter())
41            .with_retriever(Retriever(
42                Client::builder().user_agent(crate::user_agent()).build()?,
43            ));
44        Ok(Validator {
45            validators: prebuild_validators(&validation_options).await,
46            validation_options,
47        })
48    }
49
50    /// Validates a single value.
51    ///
52    /// # Examples
53    ///
54    /// ```
55    /// use stac::Item;
56    /// use stac_validate::Validate;
57    ///
58    /// #[tokio::main]
59    /// async fn main() {
60    ///     let mut item = Item::new("an-id");
61    ///     item.validate().await.unwrap();
62    /// }
63    /// ```
64    pub async fn validate<T>(&mut self, value: &T) -> Result<()>
65    where
66        T: Serialize,
67    {
68        let value = serde_json::to_value(value)?;
69        let _ = self.validate_value(value).await?;
70        Ok(())
71    }
72
73    /// If you have a [serde_json::Value], you can skip a deserialization step by using this method.
74    #[async_recursion]
75    pub async fn validate_value(&mut self, value: Value) -> Result<Value> {
76        if let Value::Object(object) = value {
77            self.validate_object(object).await.map(Value::Object)
78        } else if let Value::Array(array) = value {
79            self.validate_array(array).await.map(Value::Array)
80        } else {
81            Err(Error::ScalarJson(value))
82        }
83    }
84
85    #[async_recursion]
86    async fn validate_array(&mut self, array: Vec<Value>) -> Result<Vec<Value>> {
87        let mut errors = Vec::new();
88        let mut new_array = Vec::with_capacity(array.len());
89        for value in array {
90            match self.validate_value(value).await {
91                Ok(value) => new_array.push(value),
92                Err(error) => {
93                    if let Error::Validation(e) = error {
94                        errors.extend(e);
95                    } else {
96                        return Err(error);
97                    }
98                }
99            }
100        }
101        if errors.is_empty() {
102            Ok(new_array)
103        } else {
104            Err(Error::Validation(errors))
105        }
106    }
107
108    #[async_recursion]
109    async fn validate_object(
110        &mut self,
111        mut object: Map<String, Value>,
112    ) -> Result<Map<String, Value>> {
113        let r#type = if let Some(r#type) = object.get("type").and_then(|v| v.as_str()) {
114            let r#type: Type = r#type.parse()?;
115            if r#type == Type::ItemCollection {
116                if let Some(features) = object.remove("features") {
117                    let features = self.validate_value(features).await?;
118                    let _ = object.insert("features".to_string(), features);
119                }
120                return Ok(object);
121            }
122            r#type
123        } else {
124            match object.remove("collections") {
125                Some(collections) => {
126                    let collections = self.validate_value(collections).await?;
127                    let _ = object.insert("collections".to_string(), collections);
128                    return Ok(object);
129                }
130                _ => {
131                    return Err(stac::Error::MissingField("type").into());
132                }
133            }
134        };
135
136        let version: Version = object
137            .get("stac_version")
138            .and_then(|v| v.as_str())
139            .map(|v| v.parse::<Version>())
140            .transpose()
141            .unwrap()
142            .ok_or(stac::Error::MissingField("stac_version"))?;
143
144        let uri = build_uri(r#type, &version);
145        let validator = self.validator(uri).await?;
146        let value = Value::Object(object);
147        let errors: Vec<_> = validator.iter_errors(&value).collect();
148        let object = if errors.is_empty() {
149            if let Value::Object(object) = value {
150                object
151            } else {
152                unreachable!()
153            }
154        } else {
155            return Err(Error::from_validation_errors(
156                errors.into_iter(),
157                Some(&value),
158            ));
159        };
160
161        self.validate_extensions(object).await
162    }
163
164    async fn validate_extensions(
165        &mut self,
166        object: Map<String, Value>,
167    ) -> Result<Map<String, Value>> {
168        match object
169            .get("stac_extensions")
170            .and_then(|value| value.as_array())
171            .cloned()
172        {
173            Some(stac_extensions) => {
174                let uris = stac_extensions
175                    .into_iter()
176                    .filter_map(|value| {
177                        if let Value::String(s) = value {
178                            Some(Uri::parse(s))
179                        } else {
180                            None
181                        }
182                    })
183                    .collect::<std::result::Result<Vec<_>, _>>()?;
184                self.ensure_validators(&uris).await?;
185
186                let mut errors = Vec::new();
187                let value = Value::Object(object);
188                for uri in uris {
189                    let validator = self
190                        .validator_opt(&uri)
191                        .expect("We already ensured they're present");
192                    errors.extend(validator.iter_errors(&value));
193                }
194                if errors.is_empty() {
195                    if let Value::Object(object) = value {
196                        Ok(object)
197                    } else {
198                        unreachable!()
199                    }
200                } else {
201                    Err(Error::from_validation_errors(
202                        errors.into_iter(),
203                        Some(&value),
204                    ))
205                }
206            }
207            _ => Ok(object),
208        }
209    }
210
211    async fn validator(&mut self, uri: Uri<String>) -> Result<&JsonschemaValidator> {
212        self.ensure_validator(&uri).await?;
213        Ok(self.validator_opt(&uri).unwrap())
214    }
215
216    async fn ensure_validators(&mut self, uris: &[Uri<String>]) -> Result<()> {
217        for uri in uris {
218            self.ensure_validator(uri).await?;
219        }
220        Ok(())
221    }
222
223    async fn ensure_validator(&mut self, uri: &Uri<String>) -> Result<()> {
224        if !self.validators.contains_key(uri) {
225            let client = reqwest::Client::new();
226            let response = client.get(uri.as_str()).send().await?.error_for_status()?;
227            let json_data = response.json().await?;
228            let validator = self
229                .validation_options
230                .build(&json_data)
231                .await
232                .map_err(Box::new)?;
233            let _ = self.validators.insert(uri.clone(), validator);
234        }
235        Ok(())
236    }
237
238    fn validator_opt(&self, uri: &Uri<String>) -> Option<&JsonschemaValidator> {
239        self.validators.get(uri)
240    }
241}
242
243#[async_trait]
244impl AsyncRetrieve for Retriever {
245    async fn retrieve(
246        &self,
247        uri: &Uri<String>,
248    ) -> std::result::Result<Value, Box<dyn std::error::Error + Send + Sync>> {
249        let response = self.0.get(uri.as_str()).send().await?.error_for_status()?;
250        let value = response.json().await?;
251        Ok(value)
252    }
253}
254
255fn build_uri(r#type: Type, version: &Version) -> Uri<String> {
256    Uri::parse(format!(
257        "{}{}",
258        SCHEMA_BASE,
259        r#type
260            .spec_path(version)
261            .expect("we shouldn't get here with an item collection")
262    ))
263    .unwrap()
264}
265
266async fn prebuild_validators(
267    validation_options: &ValidationOptions<Arc<dyn referencing::AsyncRetrieve>>,
268) -> HashMap<Uri<String>, JsonschemaValidator> {
269    use Type::*;
270    use Version::*;
271
272    let mut schemas = HashMap::new();
273
274    macro_rules! schema {
275        ($t:expr_2021, $v:expr_2021, $path:expr_2021, $schemas:expr_2021) => {
276            let url = build_uri($t, &$v);
277            let value = serde_json::from_str(include_str!($path)).unwrap();
278            let validator = validation_options.build(&value).await.unwrap();
279            let _ = schemas.insert(url, validator);
280        };
281    }
282
283    schema!(Item, v1_0_0, "schemas/v1.0.0/item.json", schemas);
284    schema!(Catalog, v1_0_0, "schemas/v1.0.0/catalog.json", schemas);
285    schema!(
286        Collection,
287        v1_0_0,
288        "schemas/v1.0.0/collection.json",
289        schemas
290    );
291    schema!(Item, v1_1_0, "schemas/v1.1.0/item.json", schemas);
292    schema!(Catalog, v1_1_0, "schemas/v1.1.0/catalog.json", schemas);
293    schema!(
294        Collection,
295        v1_1_0,
296        "schemas/v1.1.0/collection.json",
297        schemas
298    );
299
300    schemas
301}
302
303fn prebuild_resources() -> Vec<(String, Resource)> {
304    let mut resources = Vec::new();
305
306    macro_rules! resolve {
307        ($url:expr_2021, $path:expr_2021) => {
308            let _ = resources.push((
309                $url.to_string(),
310                Resource::from_contents(serde_json::from_str(include_str!($path)).unwrap())
311                    .unwrap(),
312            ));
313        };
314    }
315
316    // General
317    resolve!(
318        "https://geojson.org/schema/Feature.json",
319        "schemas/geojson/Feature.json"
320    );
321    resolve!(
322        "https://geojson.org/schema/Geometry.json",
323        "schemas/geojson/Geometry.json"
324    );
325    resolve!(
326        "http://json-schema.org/draft-07/schema",
327        "schemas/json-schema/draft-07.json"
328    );
329
330    // STAC v1.0.0
331    resolve!(
332        "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/basics.json",
333        "schemas/v1.0.0/basics.json"
334    );
335    resolve!(
336        "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json",
337        "schemas/v1.0.0/datetime.json"
338    );
339    resolve!(
340        "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/instrument.json",
341        "schemas/v1.0.0/instrument.json"
342    );
343    resolve!(
344        "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json",
345        "schemas/v1.0.0/item.json"
346    );
347    resolve!(
348        "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/licensing.json",
349        "schemas/v1.0.0/licensing.json"
350    );
351    resolve!(
352        "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/provider.json",
353        "schemas/v1.0.0/provider.json"
354    );
355
356    // STAC v1.1.0
357    resolve!(
358        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/bands.json",
359        "schemas/v1.1.0/bands.json"
360    );
361    resolve!(
362        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/basics.json",
363        "schemas/v1.1.0/basics.json"
364    );
365    resolve!(
366        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/common.json",
367        "schemas/v1.1.0/common.json"
368    );
369    resolve!(
370        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/data-values.json",
371        "schemas/v1.1.0/data-values.json"
372    );
373    resolve!(
374        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/datetime.json",
375        "schemas/v1.1.0/datetime.json"
376    );
377    resolve!(
378        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/instrument.json",
379        "schemas/v1.1.0/instrument.json"
380    );
381    resolve!(
382        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/item.json",
383        "schemas/v1.1.0/item.json"
384    );
385    resolve!(
386        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/licensing.json",
387        "schemas/v1.1.0/licensing.json"
388    );
389    resolve!(
390        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/provider.json",
391        "schemas/v1.1.0/provider.json"
392    );
393
394    resources
395}
396
397#[cfg(test)]
398mod tests {
399    use super::Validator;
400    use crate::Validate;
401    use serde_json::json;
402    use stac::{Collection, Item};
403
404    #[tokio::test]
405    async fn validate_simple_item() {
406        let item: Item = stac_io::read("examples/simple-item.json").unwrap();
407        item.validate().await.unwrap();
408    }
409
410    #[tokio::test]
411    async fn validate_inside_tokio_runtime() {
412        let item: Item = stac_io::read("examples/extended-item.json").unwrap();
413        item.validate().await.unwrap();
414    }
415
416    #[tokio::test]
417    async fn validate_array() {
418        let items: Vec<_> = (0..100)
419            .map(|i| Item::new(format!("item-{i}")))
420            .map(|i| serde_json::to_value(i).unwrap())
421            .collect();
422        let mut validator = Validator::new().await.unwrap();
423        validator.validate(&items).await.unwrap();
424    }
425
426    #[tokio::test]
427    async fn validate_collections() {
428        let collection: Collection = stac_io::read("examples/collection.json").unwrap();
429        let collections = json!({
430            "collections": [collection]
431        });
432        collections.validate().await.unwrap();
433    }
434}