stac_validate/
validator.rs

1use crate::{Error, Result};
2use async_recursion::async_recursion;
3use async_trait::async_trait;
4use fluent_uri::Uri;
5use jsonschema::{AsyncRetrieve, Resource, ValidationOptions, Validator as JsonschemaValidator};
6use reqwest::Client;
7use serde::Serialize;
8use serde_json::{Map, Value};
9use stac::{Type, Version};
10use std::collections::HashMap;
11use std::sync::Arc;
12
13const SCHEMA_BASE: &str = "https://schemas.stacspec.org";
14
15/// A structure for validating STAC.
16pub struct Validator {
17    validators: HashMap<Uri<String>, JsonschemaValidator>,
18    validation_options: ValidationOptions<Arc<dyn referencing::AsyncRetrieve>>,
19}
20
21#[derive(Debug)]
22struct Retriever(Client);
23
24impl Validator {
25    /// Creates a new validator.
26    ///
27    /// # Examples
28    ///
29    /// ```
30    /// use stac_validate::Validator;
31    ///
32    /// #[tokio::main]
33    /// async fn main() {
34    ///     let validator = Validator::new().await.unwrap();
35    /// }
36    /// ```
37    pub async fn new() -> Result<Validator> {
38        let validation_options = jsonschema::async_options();
39        let validation_options = validation_options
40            .with_resources(prebuild_resources().into_iter())
41            .with_retriever(Retriever(
42                Client::builder().user_agent(crate::user_agent()).build()?,
43            ));
44        Ok(Validator {
45            validators: prebuild_validators(&validation_options).await,
46            validation_options,
47        })
48    }
49
50    /// Validates a single value.
51    ///
52    /// # Examples
53    ///
54    /// ```
55    /// use stac::Item;
56    /// use stac_validate::Validate;
57    ///
58    /// #[tokio::main]
59    /// async fn main() {
60    ///     let mut item = Item::new("an-id");
61    ///     item.validate().await.unwrap();
62    /// }
63    /// ```
64    pub async fn validate<T>(&mut self, value: &T) -> Result<()>
65    where
66        T: Serialize,
67    {
68        let value = serde_json::to_value(value)?;
69        let _ = self.validate_value(value).await?;
70        Ok(())
71    }
72
73    /// If you have a [serde_json::Value], you can skip a deserialization step by using this method.
74    #[async_recursion]
75    pub async fn validate_value(&mut self, value: Value) -> Result<Value> {
76        if let Value::Object(object) = value {
77            self.validate_object(object).await.map(Value::Object)
78        } else if let Value::Array(array) = value {
79            self.validate_array(array).await.map(Value::Array)
80        } else {
81            Err(Error::ScalarJson(value))
82        }
83    }
84
85    #[async_recursion]
86    async fn validate_array(&mut self, array: Vec<Value>) -> Result<Vec<Value>> {
87        let mut errors = Vec::new();
88        let mut new_array = Vec::with_capacity(array.len());
89        for value in array {
90            match self.validate_value(value).await {
91                Ok(value) => new_array.push(value),
92                Err(error) => {
93                    if let Error::Validation(e) = error {
94                        errors.extend(e);
95                    } else {
96                        return Err(error);
97                    }
98                }
99            }
100        }
101        if errors.is_empty() {
102            Ok(new_array)
103        } else {
104            Err(Error::Validation(errors))
105        }
106    }
107
108    #[async_recursion]
109    async fn validate_object(
110        &mut self,
111        mut object: Map<String, Value>,
112    ) -> Result<Map<String, Value>> {
113        let r#type = if let Some(r#type) = object.get("type").and_then(|v| v.as_str()) {
114            let r#type: Type = r#type.parse()?;
115            if r#type == Type::ItemCollection {
116                if let Some(features) = object.remove("features") {
117                    let features = self.validate_value(features).await?;
118                    let _ = object.insert("features".to_string(), features);
119                }
120                return Ok(object);
121            }
122            r#type
123        } else {
124            match object.remove("collections") {
125                Some(collections) => {
126                    let collections = self.validate_value(collections).await?;
127                    let _ = object.insert("collections".to_string(), collections);
128                    return Ok(object);
129                }
130                _ => {
131                    return Err(stac::Error::MissingField("type").into());
132                }
133            }
134        };
135
136        let version: Version = object
137            .get("stac_version")
138            .and_then(|v| v.as_str())
139            .map(|v| v.parse::<Version>())
140            .transpose()
141            .unwrap()
142            .ok_or(stac::Error::MissingField("stac_version"))?;
143
144        let uri = build_uri(r#type, &version);
145        let validator = self.validator(uri).await?;
146        let value = Value::Object(object);
147        let errors: Vec<_> = validator.iter_errors(&value).collect();
148        let object = if errors.is_empty() {
149            if let Value::Object(object) = value {
150                object
151            } else {
152                unreachable!()
153            }
154        } else {
155            return Err(Error::from_validation_errors(
156                errors.into_iter(),
157                Some(&value),
158            ));
159        };
160
161        self.validate_extensions(object).await
162    }
163
164    async fn validate_extensions(
165        &mut self,
166        object: Map<String, Value>,
167    ) -> Result<Map<String, Value>> {
168        match object
169            .get("stac_extensions")
170            .and_then(|value| value.as_array())
171            .cloned()
172        {
173            Some(stac_extensions) => {
174                let uris = stac_extensions
175                    .into_iter()
176                    .filter_map(|value| {
177                        if let Value::String(s) = value {
178                            Some(Uri::parse(s).map_err(|(err, _)| err))
179                        } else {
180                            None
181                        }
182                    })
183                    .collect::<std::result::Result<Vec<_>, _>>()?;
184                self.ensure_validators(&uris).await?;
185
186                let mut errors = Vec::new();
187                let value = Value::Object(object);
188                for uri in uris {
189                    let validator = self
190                        .validator_opt(&uri)
191                        .expect("We already ensured they're present");
192                    errors.extend(validator.iter_errors(&value));
193                }
194                if errors.is_empty() {
195                    if let Value::Object(object) = value {
196                        Ok(object)
197                    } else {
198                        unreachable!()
199                    }
200                } else {
201                    Err(Error::from_validation_errors(
202                        errors.into_iter(),
203                        Some(&value),
204                    ))
205                }
206            }
207            _ => Ok(object),
208        }
209    }
210
211    async fn validator(&mut self, uri: Uri<String>) -> Result<&JsonschemaValidator> {
212        self.ensure_validator(&uri).await?;
213        Ok(self.validator_opt(&uri).unwrap())
214    }
215
216    async fn ensure_validators(&mut self, uris: &[Uri<String>]) -> Result<()> {
217        for uri in uris {
218            self.ensure_validator(uri).await?;
219        }
220        Ok(())
221    }
222
223    async fn ensure_validator(&mut self, uri: &Uri<String>) -> Result<()> {
224        if !self.validators.contains_key(uri) {
225            let client = reqwest::Client::new();
226            let response = client.get(uri.as_str()).send().await?.error_for_status()?;
227            let json_data = response.json().await?;
228            let validator = self
229                .validation_options
230                .build(&json_data)
231                .await
232                .map_err(Box::new)?;
233            let _ = self.validators.insert(uri.clone(), validator);
234        }
235        Ok(())
236    }
237
238    fn validator_opt(&self, uri: &Uri<String>) -> Option<&JsonschemaValidator> {
239        self.validators.get(uri)
240    }
241}
242
243#[async_trait]
244impl AsyncRetrieve for Retriever {
245    async fn retrieve(
246        &self,
247        uri: &Uri<String>,
248    ) -> std::result::Result<Value, Box<dyn std::error::Error + Send + Sync>> {
249        let response = self.0.get(uri.as_str()).send().await?.error_for_status()?;
250        let value = response.json().await?;
251        Ok(value)
252    }
253}
254
255fn build_uri(r#type: Type, version: &Version) -> Uri<String> {
256    Uri::parse(format!(
257        "{}{}",
258        SCHEMA_BASE,
259        r#type
260            .spec_path(version)
261            .expect("we shouldn't get here with an item collection")
262    ))
263    .unwrap()
264}
265
266async fn prebuild_validators(
267    validation_options: &ValidationOptions<Arc<dyn referencing::AsyncRetrieve>>,
268) -> HashMap<Uri<String>, JsonschemaValidator> {
269    use Type::*;
270    use Version::*;
271
272    let mut schemas = HashMap::new();
273
274    macro_rules! schema {
275        ($t:expr_2021, $v:expr_2021, $path:expr_2021, $schemas:expr_2021) => {
276            let url = build_uri($t, &$v);
277            let value = serde_json::from_str(include_str!($path)).unwrap();
278            let validator = validation_options.build(&value).await.unwrap();
279            let _ = schemas.insert(url, validator);
280        };
281    }
282
283    schema!(Item, v1_0_0, "schemas/v1.0.0/item.json", schemas);
284    schema!(Catalog, v1_0_0, "schemas/v1.0.0/catalog.json", schemas);
285    schema!(
286        Collection,
287        v1_0_0,
288        "schemas/v1.0.0/collection.json",
289        schemas
290    );
291    schema!(Item, v1_1_0, "schemas/v1.1.0/item.json", schemas);
292    schema!(Catalog, v1_1_0, "schemas/v1.1.0/catalog.json", schemas);
293    schema!(
294        Collection,
295        v1_1_0,
296        "schemas/v1.1.0/collection.json",
297        schemas
298    );
299
300    schemas
301}
302
303fn prebuild_resources() -> Vec<(String, Resource)> {
304    let mut resources = Vec::new();
305
306    macro_rules! resolve {
307        ($url:expr_2021, $path:expr_2021) => {
308            let _ = resources.push((
309                $url.to_string(),
310                Resource::from_contents(serde_json::from_str(include_str!($path)).unwrap()),
311            ));
312        };
313    }
314
315    // General
316    resolve!(
317        "https://geojson.org/schema/Feature.json",
318        "schemas/geojson/Feature.json"
319    );
320    resolve!(
321        "https://geojson.org/schema/Geometry.json",
322        "schemas/geojson/Geometry.json"
323    );
324    resolve!(
325        "http://json-schema.org/draft-07/schema",
326        "schemas/json-schema/draft-07.json"
327    );
328
329    // STAC v1.0.0
330    resolve!(
331        "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/basics.json",
332        "schemas/v1.0.0/basics.json"
333    );
334    resolve!(
335        "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json",
336        "schemas/v1.0.0/datetime.json"
337    );
338    resolve!(
339        "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/instrument.json",
340        "schemas/v1.0.0/instrument.json"
341    );
342    resolve!(
343        "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json",
344        "schemas/v1.0.0/item.json"
345    );
346    resolve!(
347        "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/licensing.json",
348        "schemas/v1.0.0/licensing.json"
349    );
350    resolve!(
351        "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/provider.json",
352        "schemas/v1.0.0/provider.json"
353    );
354
355    // STAC v1.1.0
356    resolve!(
357        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/bands.json",
358        "schemas/v1.1.0/bands.json"
359    );
360    resolve!(
361        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/basics.json",
362        "schemas/v1.1.0/basics.json"
363    );
364    resolve!(
365        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/common.json",
366        "schemas/v1.1.0/common.json"
367    );
368    resolve!(
369        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/data-values.json",
370        "schemas/v1.1.0/data-values.json"
371    );
372    resolve!(
373        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/datetime.json",
374        "schemas/v1.1.0/datetime.json"
375    );
376    resolve!(
377        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/instrument.json",
378        "schemas/v1.1.0/instrument.json"
379    );
380    resolve!(
381        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/item.json",
382        "schemas/v1.1.0/item.json"
383    );
384    resolve!(
385        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/licensing.json",
386        "schemas/v1.1.0/licensing.json"
387    );
388    resolve!(
389        "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/provider.json",
390        "schemas/v1.1.0/provider.json"
391    );
392
393    resources
394}
395
396#[cfg(test)]
397mod tests {
398    use super::Validator;
399    use crate::Validate;
400    use serde_json::json;
401    use stac::{Collection, Item};
402
403    #[tokio::test]
404    async fn validate_simple_item() {
405        let item: Item = stac_io::read("examples/simple-item.json").unwrap();
406        item.validate().await.unwrap();
407    }
408
409    #[tokio::test]
410    async fn validate_inside_tokio_runtime() {
411        let item: Item = stac_io::read("examples/extended-item.json").unwrap();
412        item.validate().await.unwrap();
413    }
414
415    #[tokio::test]
416    async fn validate_array() {
417        let items: Vec<_> = (0..100)
418            .map(|i| Item::new(format!("item-{i}")))
419            .map(|i| serde_json::to_value(i).unwrap())
420            .collect();
421        let mut validator = Validator::new().await.unwrap();
422        validator.validate(&items).await.unwrap();
423    }
424
425    #[tokio::test]
426    async fn validate_collections() {
427        let collection: Collection = stac_io::read("examples/collection.json").unwrap();
428        let collections = json!({
429            "collections": [collection]
430        });
431        collections.validate().await.unwrap();
432    }
433}