json_ns/
lib.rs

1//! The reference implementation for JSON-NS, a small and basic subset of JSON-LD. See the [blog
2//! post] for what this is and why it exists.
3//!
4//!  [blog post]: https://stephank.nl/p/2018-10-20-a-proposal-for-standardising-a-subset-of-json-ld.html
5//!
6//! This implementation uses the `serde_json` crate types to represent JSON values. Doing basic
7//! processing involves creating a `Processor`, which holds some optional configuration, and giving
8//! it a `Value` to process:
9//!
10//! ```rust
11//! #[macro_use]
12//! extern crate serde_json as json;
13//! extern crate json_ns;
14//!
15//! use json_ns::Processor;
16//!
17//! fn main() {
18//!     // Some example input.
19//!     let input = json!({
20//!         "@context": {
21//!             "foo": "http://example.com/ns#"
22//!         },
23//!         "foo:hello": "world"
24//!     });
25//!
26//!     // Process the document, and use `bar` instead as the output prefix.
27//!     let output = Processor::new()
28//!         .add_rule("bar", "http://example.com/ns#")
29//!         .process_value(&input);
30//!
31//!     // Check that the output is what we expected.
32//!     assert_eq!(output, json!({
33//!         "bar:hello": "world"
34//!     }));
35//! }
36//! ```
37//!
38//! Without the processor configuration, this code can be even shorter:
39//!
40//! ```rust,ignore
41//! let output = Processor::new().process_value(&input);
42//! ```
43//!
44//! In this case, the output document contains a property named `http://example.com/ns#hello`.
45//!
46//! Often, the bulk of the properties you expect are in a single namespace. In this case, it may be
47//! useful to set a default namespace on the output, for which properties are not prefixed at all:
48//!
49//! ```rust,ignore
50//! processor.add_rule("", "http://example.com/ns#");
51//! ```
52//!
53//! The output then contains a property named just `hello`. This is especially useful when passing
54//! the value on to `serde_json::from_value` to parse it into a struct that derives `Deserialize`.
55//!
56//! Note that the output should *not* itself be considered a JSON-NS document. Running input
57//! through a processor twice may produce unexpected results.
58//!
59//! That should cover the basics. More details can be found in the documentation of the structs,
60//! fields and functions.
61
62#[macro_use]
63extern crate cfg_if;
64extern crate serde_json as json;
65
66cfg_if! {
67    if #[cfg(test)] {
68        extern crate colored;
69        mod test;
70    }
71}
72
73use json::Value;
74use std::borrow::Cow;
75use std::collections::BTreeMap;
76use std::slice::Iter;
77
78type Map = json::Map<String, Value>;
79
80/// Iterator used to walk a value that may or may not be an array.
81enum OneOrMany<'a> {
82  None,
83  One(&'a Value),
84  Many(Iter<'a, Value>),
85}
86
87impl<'a> From<&'a Value> for OneOrMany<'a> {
88    fn from(value: &'a Value) -> Self {
89        match *value {
90            Value::Array(ref arr) => OneOrMany::Many(arr.iter()),
91            ref value => OneOrMany::One(value),
92        }
93    }
94}
95
96impl<'a> Iterator for OneOrMany<'a> {
97    type Item = &'a Value;
98    fn next(&mut self) -> Option<&'a Value> {
99        match *self {
100            OneOrMany::None => None,
101            OneOrMany::One(value) => {
102                *self = OneOrMany::None;
103                Some(value)
104            },
105            OneOrMany::Many(ref mut iter) => {
106                iter.next()
107            },
108        }
109    }
110}
111
112/// Structure holding the current context to interpret a document with.
113///
114/// An instance of this struct is part of the `Processor`, which can be modified to provide an
115/// external context to interpret documents with. Such a custom context can also be created from
116/// JSON using one of the `From` implementations.
117#[derive(Clone,Debug,Default)]
118pub struct Context {
119    /// The default namespace, for properties that are not a keyword, CURIE, or IRI.
120    pub ns: Option<String>,
121    /// The Default language for internationalised properties that don't specify one. The empty
122    /// string when not defined.
123    pub lang: String,
124    /// Map of defined CURIE prefixes to their base IRIs.
125    pub prefixes: BTreeMap<String, String>,
126    /// Map of defined aliases by their literal property names.
127    pub aliases: BTreeMap<String, String>,
128    /// Map of defined container mappings by their literal property names.
129    pub container: BTreeMap<String, String>,
130}
131
132impl Context {
133    /// An alias for `Context::default()`.
134    pub fn new() -> Context {
135        Context::default()
136    }
137
138    /// Merge an `@context` value into this structure.
139    pub fn merge_value(&mut self, value: &Value) {
140        for value in OneOrMany::from(value) {
141            match *value {
142                Value::Null => {
143                    // A null clears the context.
144                    *self = Context::default();
145                },
146                Value::Object(ref object) => {
147                    // An object is merged into the context.
148                    self.merge_object(object);
149                },
150                _ => {
151                    // Captures remote context references, but also anything else we don't understand.
152                    // These are simply ignored.
153                },
154            }
155        }
156    }
157
158    /// Merge an `@context` object into this structure.
159    pub fn merge_object(&mut self, object: &Map) {
160        for (key, value) in object {
161            if is_keyword(key) {
162                match key.as_str() {
163                    "@vocab" => {
164                        // Set the default namespace. May be null to clear it.
165                        if let Some(ns) = value.as_str().filter(|s| is_absolute_iri(s)) {
166                            self.ns = Some(ns.to_owned());
167                        } else if value.is_null() {
168                            self.ns = None;
169                        }
170                    },
171                    "@language" => {
172                        // Set the default language. May be null to clear it.
173                        if let Some(lang) = value.as_str() {
174                            self.lang = lang.to_owned();
175                        } else if value.is_null() {
176                            self.lang = "".to_owned();
177                        }
178                    },
179                    _ => {},
180                }
181            } else {
182                match *value {
183                    Value::String(ref string) => {
184                        // Define a namespace.
185                        if is_curie_prefix(key) && is_absolute_iri(string) {
186                            self.prefixes.insert(key.to_owned(), string.to_owned());
187                        }
188                    },
189                    Value::Object(ref object) => {
190                        // Look for an alias.
191                        let alias = object.get("@id")
192                            .and_then(Value::as_str)
193                            .filter(|string| !is_keyword(string));
194                        if let Some(alias) = alias {
195                            self.aliases.insert(key.to_owned(), alias.to_owned());
196                        }
197
198                        // Look for a container mapping.
199                        let container = object.get("@container")
200                            .and_then(Value::as_str);
201                        if let Some(container) = container {
202                            self.container.insert(key.to_owned(), container.to_owned());
203                        }
204                    },
205                    Value::Null => {
206                        // A null value is used to clear whatever was defined.
207                        self.prefixes.remove(key);
208                        self.aliases.remove(key);
209                        self.container.remove(key);
210                    },
211                    _ => {},
212                }
213            }
214        }
215    }
216
217    /// Expand a name according to this context.
218    ///
219    /// A name may be an absolute IRI, a CURIE within a defined namespace, or a name in the default
220    /// namespace, otherwise `None` is returned (and the property or value should be dropped).
221    pub fn expand_name<'a>(&self, name: &'a str) -> Option<Cow<'a, str>> {
222        if name.starts_with('@') {
223            return None;
224        }
225
226        let mut parts = name.splitn(2, ':');
227        let prefix = parts.next().unwrap();
228        if let Some(suffix) = parts.next() {
229            if let Some(base) = self.prefixes.get(prefix) {
230                // A CURIE within a defined namespace.
231                Some(Cow::from(format!("{}{}", base, suffix)))
232            } else {
233                // An absolute IRI in some other scheme.
234                Some(Cow::from(name))
235            }
236        } else if let Some(ref base) = self.ns {
237            // A term in the default namespace.
238            Some(Cow::from(format!("{}{}", base, name)))
239        } else {
240            None
241        }
242    }
243}
244
245impl<'a> From<&'a Value> for Context {
246    fn from(value: &'a Value) -> Context {
247        let mut context = Context::default();
248        context.merge_value(value);
249        context
250    }
251}
252
253impl<'a> From<&'a Map> for Context {
254    fn from(object: &'a Map) -> Context {
255        let mut context = Context::default();
256        context.merge_object(object);
257        context
258    }
259}
260
261/// Structure holding the target context to reword a document to.
262///
263/// An instance of this struct is part of the `Processor`, which can be modified to provide rules
264/// according to which the output will be reworded.
265///
266/// By default, this context is empty, which will result in an output document containing only
267/// absolute IRIs.
268#[derive(Clone,Debug,Default)]
269pub struct TargetContext {
270    /// Pairs of CURIE prefixes and their respective base IRIs.
271    ///
272    /// For absolute IRIs that are about to be added to the output document, the processor will try
273    /// to find a matching prefix in this list. If found, a CURIE will be used instead.
274    ///
275    /// This list may also contain an entry with an empty string prefix, which then represents the
276    /// default namespace of the output document.
277    pub rules: Vec<(String, String)>,
278}
279
280impl TargetContext {
281    /// Alias for `TargetContext::default()`.
282    pub fn new() -> TargetContext {
283        TargetContext::default()
284    }
285
286    /// A short-hand for adding a rule.
287    pub fn add_rule(&mut self, prefix: &str, base: &str) -> &mut Self {
288        self.rules.push((prefix.to_owned(), base.to_owned()));
289        self
290    }
291
292    /// Compact an absolute IRI according to this context.
293    pub fn compact_iri<'a>(&self, iri: &'a str) -> Cow<'a, str> {
294        for (prefix, base) in &self.rules {
295            if iri.starts_with(base) {
296                let suffix = &iri[base.len()..];
297                if prefix.is_empty() {
298                    // Matched the default namespace.
299                    return Cow::from(suffix);
300                } else {
301                    // Matched a prefix, generate a CURIE.
302                    return Cow::from(format!("{}:{}", prefix, suffix));
303                }
304            }
305        }
306        // No match, output the absolute IRI.
307        Cow::from(iri)
308    }
309}
310
311/// A document processor.
312///
313/// This structure holds configuration for processing documents. The defaults are fine if the
314/// output document should contain only absolute IRIs, but usually you want to set some namespaces
315/// for the output document in the `TargetContext` contained within.
316#[derive(Clone,Debug,Default)]
317pub struct Processor {
318    /// External context added to the document. Defaults to an empty context, so only inline
319    /// contexts in the document itself are used.
320    pub context: Context,
321    /// Target context to reword the document to. Defaults to an empty context, so the result will
322    /// contain only absolute IRIs for all properties and types.
323    pub target: TargetContext,
324}
325
326impl Processor {
327    /// Alias for `Processor::default()`.
328    pub fn new() -> Processor {
329        Processor::default()
330    }
331
332    /// A short-hand for adding a rule to the contained `TargetContext`.
333    pub fn add_rule(&mut self, prefix: &str, base: &str) -> &mut Self {
334        self.target.add_rule(prefix, base);
335        self
336    }
337
338    /// Process a value, using the configuration in this struct.
339    pub fn process_value(&self, value: &Value) -> Value {
340        self.process_value_inner(value, &self.context)
341    }
342
343    /// Process an object, using the configuration in this struct.
344    pub fn process_object(&self, object: &Map) -> Map {
345        self.process_object_inner(object, &self.context)
346    }
347
348    /// Process a value with a local context.
349    fn process_value_inner(&self, value: &Value, context: &Context) -> Value {
350        match *value {
351            Value::Array(ref array) => {
352                let array = array.iter()
353                    .map(|value| self.process_value_inner(value, context))
354                    .collect::<Vec<_>>();
355                Value::Array(array)
356            },
357            Value::Object(ref object) => {
358                Value::Object(self.process_object_inner(object, context))
359            },
360            ref value => value.clone(),
361        }
362    }
363
364    /// Process an object with a local context.
365    fn process_object_inner(&self, object: &Map, context: &Context) -> Map {
366        // Extend the active context with the local context, if present.
367        let local_context = object.get("@context").map(|value| {
368            let mut context = context.clone();
369            context.merge_value(value);
370            context
371        });
372        let context = local_context.as_ref().unwrap_or(context);
373
374        let mut result = Map::with_capacity(object.len());
375        for (key, value) in object {
376            if key.starts_with('@') {
377                // A keyword property.
378                match key.as_str() {
379                    "@id" => {
380                        // Document ID, must be an absolute IRI.
381                        if let Some(iri) = value.as_str().filter(|s| is_absolute_iri(s)) {
382                            result.insert(key.clone(), Value::String(iri.to_owned()));
383                        }
384                    },
385                    "@type" => {
386                        // Document type, a string or array of strings, each of which expands to an
387                        // absolute IRI. (We don't support `@type` on values, like JSON-LD.)
388                        let value = OneOrMany::from(value)
389                            .filter_map(|value| value.as_str())
390                            .filter_map(|string| context.expand_name(string))
391                            .map(|iri| self.target.compact_iri(&iri).into_owned())
392                            .map(Value::String)
393                            .collect::<Vec<_>>();
394                        if !value.is_empty() {
395                            result.insert(key.clone(), Value::Array(value));
396                        }
397                    },
398                    _ => {
399                        // Ignore `@context` (already processed) and other unrecognized keywords.
400                    },
401                }
402
403                continue;
404            }
405
406            // Look for an alias.
407            let resolved = context.aliases.get(key).map(String::as_str).unwrap_or(key);
408
409            // Resolve in the current context.
410            let resolved = match context.expand_name(resolved) {
411                Some(iri) => self.target.compact_iri(&iri).into_owned(),
412                None => continue,
413            };
414
415            // Look for a container mapping of the original property name.
416            result.insert(resolved, match context.container.get(key).map(String::as_str) {
417                Some("@language") => {
418                    // An internationalised property.
419                    match *value {
420                        Value::String(_) => {
421                            // Normalise a string value to a language map with a single entry for
422                            // the context default language.
423                            let mut object = Map::with_capacity(1);
424                            object.insert(context.lang.clone(), value.clone());
425                            Value::Object(object)
426                        },
427                        Value::Object(ref object) => {
428                            // Filter non-string values from the object.
429                            let object = object.iter()
430                                .filter(|(_, value)| value.is_string())
431                                .map(|(key, value)| (key.clone(), value.clone()))
432                                .collect();
433                            Value::Object(object)
434                        },
435                        _ => {
436                            // Drop unrecognised values.
437                            continue;
438                        },
439                    }
440                },
441                _ => {
442                    // No or unrecognized container mapping, which we treat as a normal value.
443                    // Expand it by recursing.
444                    self.process_value_inner(value, context)
445                },
446            });
447        }
448
449        result
450    }
451}
452
453/// Whether the input is a keyword.
454fn is_keyword(input: &str) -> bool {
455    input.starts_with('@')
456}
457
458/// Whether the input is a valid absolute IRI.
459fn is_absolute_iri(input: &str) -> bool {
460    input.contains(':') && !input.starts_with('@')
461}
462
463/// Whether the input is a valid CURIE prefix.
464fn is_curie_prefix(input: &str) -> bool {
465    !input.is_empty() && !input.contains(':') && !input.starts_with('@')
466}